In [1]:
conf_vector = 'xvectors'
conf_vector_length = 128
conf_models_generation_length = 5
conf_models_container_length = 2
conf_permutations_include_zeros = False

In [2]:
import os
import json
from functools import reduce

# is_valid_segment [DONE]
def is_valid_segment(segment):
    return len(segment['speakers']) == 1 \
            and len(segment['ivectors']) == 1 \
            and len(segment['xvectors']) == 1 \
            and segment['speakers'][0]['speaker_id'] in ['A', 'B']

# load_recordings_segments [DONE]
def load_recordings_segments(directory):
    filenames = [filename for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))]
    recordings_segments = {}
    recordings_length = len(filenames)
    recordings_count = 0
    for filename in filenames:
        recording_id = filename.split('.')[0]
        filepath = os.path.join(directory, filename)
        file = open(filepath, 'r')
        recordings_segments[recording_id] = [json.loads(line) for line in file.readlines()]
        file.close()
        recordings_segments[recording_id] = list(filter(is_valid_segment, recordings_segments[recording_id]))
        recordings_count += 1
        print('Loading ' + directory + ' ' + str(recordings_count) + '/' + str(recordings_length), end = '\r')
    return recordings_segments

# speakers_get_indexes [DONE]
def speakers_get_indexes(accumulator, speaker_tuple):
    speaker_id, index = speaker_tuple
    if speaker_id in accumulator:
        accumulator[speaker_id].append(index)
    else:
        accumulator[speaker_id] = [index]
    return accumulator

# balance_segments [DONE]
def balance_segments(recordings_segments, minimum_speakers, minimum_speaker_length):
    new_recordings_segments = {}
    for recording_id in recordings_segments:
        recording_segments = recordings_segments[recording_id]
        speakers_indexes = [(segment['speakers'][0]['speaker_id'], index) for index, segment in enumerate(recording_segments)]
        speakers_indexes = reduce(speakers_get_indexes, speakers_indexes, {})
        speakers_lengths = [(speaker_id, len(speakers_indexes[speaker_id])) for speaker_id in speakers_indexes]
        speakers_lengths.sort(key = lambda x: x[1])
        speakers_lengths_min = speakers_lengths[0][1]
        if len(speakers_lengths) >= minimum_speakers and speakers_lengths_min >= minimum_speaker_length:
            recording_indexes = []
            for speaker_id in speakers_indexes:
                speakers_indexes[speaker_id] = speakers_indexes[speaker_id][:speakers_lengths_min]
                recording_indexes += speakers_indexes[speaker_id]
            new_recordings_segments[recording_id] = [segment for index, segment in enumerate(recordings_segments[recording_id]) if index in recording_indexes]
    print('Recordings left: ' + str(len(new_recordings_segments)) + '/' + str(len(recordings_segments)))
    return new_recordings_segments

In [3]:
callhome1_segments = load_recordings_segments('../exp/callhome1/json')
callhome1_segments_cut = balance_segments(callhome1_segments, 2, 20)

Recordings left: 172/249/json 249/249


In [4]:
from torch.utils.data import Dataset
import numpy as np
import itertools

class Recordings_dataset(Dataset):
    def __init__(self, recordings_segments, recordings_ids):
        self.recordings_ids = recordings_ids if isinstance(recordings_ids, list) else [recordings_ids]
        self.recordings_segments = {}
        for recording_id in self.recordings_ids:
            self.recordings_segments[recording_id] = recordings_segments[recording_id]
        self.mode = conf_vector
        self.models_generation_length = conf_models_generation_length
        self.models_container_length = conf_models_container_length
        self.permutations_include_zeros = conf_permutations_include_zeros
        self.recordings_data = {}
        self.recordings_map = []
        self.recordings_length = 0
        for recording_id in self.recordings_ids:
            self.recordings_data[recording_id] = {}
            recording_segments = self.recordings_segments[recording_id]
            recording_data = self.recordings_data[recording_id]
            recording_data['speakers_indexes'] = [(segment['speakers'][0]['speaker_id'], index) for index, segment in enumerate(recording_segments)]
            recording_data['speakers_indexes'] = reduce(speakers_get_indexes, recording_data['speakers_indexes'], {})
            recording_data['speakers_indexes_lengths_max'] = max([len(recording_data['speakers_indexes'][speaker_id]) for speaker_id in recording_data['speakers_indexes']])
            recording_data['speakers_models'] = {}
            for speaker_id in recording_data['speakers_indexes']:
                speaker_indexes = recording_data['speakers_indexes'][speaker_id]
                speaker_vectors = [np.asarray(recording_segments[index][self.mode][0]['value']) for index in speaker_indexes[:self.models_generation_length]]
                recording_data['speakers_models'][speaker_id] = [np.sum(speaker_vectors, 0) / len(speaker_vectors)]
            if self.permutations_include_zeros:
                recording_data['permutations'] = list(itertools.permutations(list(recording_data['speakers_models'].keys()) \
                + ['0' for i in range(self.models_container_length)], self.models_container_length))
            else:
                recording_data['permutations'] = list(itertools.permutations(list(recording_data['speakers_models'].keys()), self.models_container_length))
            recording_data['permutations'] = list(set(recording_data['permutations']))
            recording_data['permutations'].sort()
            recording_data['permutations_map'] = []
            recording_data['permutations_length'] = 0
            for index, permutation in enumerate(recording_data['permutations']):
                speakers_models_length = int(np.prod([len(recording_data['speakers_models'][speaker_id]) for speaker_id in permutation if speaker_id != '0']))
                recording_data['permutations_map'].append((recording_data['permutations_length'], recording_data['permutations_length'] + speakers_models_length - 1, index))
                recording_data['permutations_length'] += speakers_models_length
            recording_data['length'] = len(recording_segments) * recording_data['permutations_length']
            self.recordings_map.append((self.recordings_length, self.recordings_length + recording_data['length'] - 1, recording_id))
            self.recordings_length += recording_data['length']
    def __len__(self):
        return self.recordings_length
    def __getitem__(self, idx):
        recording_tuple = list(filter(lambda recording_tuple: recording_tuple[0] <= idx and idx <= recording_tuple[1], self.recordings_map))[0]
        recording_idx = idx - recording_tuple[0]
        recording_id = recording_tuple[2]
        recording_data = self.recordings_data[recording_id]
        
        segment_id, segment_idx = divmod(recording_idx, recording_data['permutations_length'])
        segment = self.recordings_segments[recording_id][segment_id]
        target_id = segment['speakers'][0]['speaker_id']
        vector = np.asarray(segment[self.mode][0]['value'])
        
        permutation_tuple = list(filter(lambda permutation_tuple: permutation_tuple[0] <= segment_idx and segment_idx <= permutation_tuple[1], recording_data['permutations_map']))[0]
        permutation_id = permutation_tuple[2]
        permutation = recording_data['permutations'][permutation_id]
        
        models_container = [np.asarray(recording_data['speakers_models'][speaker_id][0]) if speaker_id != '0' else np.random.uniform(-0.1, 0.1, len(vector)) for speaker_id in permutation]
        models_weigths = np.asarray([len(recording_data['speakers_indexes'][speaker_id]) if speaker_id != '0' else recording_data['speakers_indexes_lengths_max'] for speaker_id in permutation])
        models_weigths_sum = np.sum(models_weigths)
        models_weigths = np.ones(len(models_weigths)) - models_weigths / models_weigths_sum
        
        x = [vector] + models_container
        y = np.asarray([speaker_id == target_id for speaker_id in permutation], dtype = float)
        z = models_weigths
        
        return x, y, z

In [5]:
recordings_dataset = Recordings_dataset(callhome1_segments_cut, [recording_id for recording_id in callhome1_segments_cut])

In [108]:
import os
import subprocess
import re

def plda_score(ref_vector, test_vector):
    ref_filepath = '../exp/reference.1.ark'
    test_filepath = '../exp/test.1.ark'
    trials_filepath = '../exp/trials'
    
    ref_string = str(list(ref_vector)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
    test_string = str(list(test_vector)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
    
    file = open(ref_filepath, 'w')
    file.write('reference ' + ref_string + '\n')
    file.close()
    
    file = open(test_filepath, 'w')
    file.write('test ' + test_string + '\n')
    file.close()
    
    bin = './plda_score.sh'
    p = subprocess.Popen([bin], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, err = p.communicate()
    rc = p.returncode
    if rc == 0:
        print(output)
    else:
        print(err)
        exit('plda_socre.sh fail')

In [109]:
#for input, target, weigth in recordings_dataset:
#    plda_score(input[1], input[2])
input, target, weigth = recordings_dataset[301]
plda_score(input[2], input[1])

b"b'ivector-plda-scoring: error while loading shared libraries: libkaldi-ivector.so: cannot open shared object file: No such file or directory\\n'\n"
