In [1]:
import os
import json
from copy import copy
import numpy
import itertools

In [2]:
folder = 'exp/json'
filenames = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
data = {}

def is_valid_segment(line):
    segment = json.loads(line)
    return len(segment['speakers']) == 1 \
            and segment['speakers'][0]['speaker_id'] in ['A', 'B'] \
            and len(segment['ivectors']) == 1 \
            and len(segment['xvectors']) == 1

for filename in filenames:
    recording_id = filename.split('.')[0]
    filepath = os.path.join(folder, filename)
    file = open(filepath)
    # Removing multiple-speaker, extra-speaker and multiple-vector segments
    indexes = [index for index, line in enumerate(file.readlines()) if is_valid_segment(line)]
    file.close()
    data[recording_id] = {}
    data[recording_id]['filepath'] = filepath
    data[recording_id]['indexes'] = indexes

In [3]:
oracle_lengths = [1, 2, 3, 4, 5]
models_container_size = 4
data_length = 0
data_indexes = []
delete_recordings_ids = []
for recording_id in data:
    file = open(data[recording_id]['filepath'])
    segments = [json.loads(line) for index, line in enumerate(file.readlines()) if index in data[recording_id]['indexes']]
    speakers_ids = list(set([segment['speakers'][0]['speaker_id'] for segment in segments]))
    if len(speakers_ids) > 1:
        data[recording_id]['oracle'] = {}
        for speaker_id in speakers_ids:
            speaker_ivectors, speaker_xvectors = zip(*[(segment['ivectors'][0]['value'], segment['xvectors'][0]['value']) for segment in segments if segment['speakers'][0]['speaker_id'] == speaker_id])
            data[recording_id]['oracle'][speaker_id] = {}
            for oracle_length in oracle_lengths:
                oracle_ivector_segments = numpy.array(speaker_ivectors[:oracle_length])
                oracle_ivector = oracle_ivector_segments.sum(axis = 0) / len(oracle_ivector_segments)
                oracle_xvector_segments = numpy.array(speaker_xvectors[:oracle_length])
                oracle_xvector = oracle_xvector_segments.sum(axis = 0) / len(oracle_xvector_segments)
                data[recording_id]['oracle'][speaker_id][oracle_length] = {}
                data[recording_id]['oracle'][speaker_id][oracle_length]['ivectors'] = [oracle_ivector]
                data[recording_id]['oracle'][speaker_id][oracle_length]['xvectors'] = [oracle_xvector]
        for speaker_id in speakers_ids:
            data[recording_id]['oracle']['0'] = {}
            for oracle_length in oracle_lengths:
                data[recording_id]['oracle']['0'][oracle_length] = {}
                data[recording_id]['oracle']['0'][oracle_length]['ivectors'] = [numpy.zeros(len(data[recording_id]['oracle'][speaker_id][oracle_length]['ivectors'][0]))]
                data[recording_id]['oracle']['0'][oracle_length]['xvectors'] = [numpy.zeros(len(data[recording_id]['oracle'][speaker_id][oracle_length]['xvectors'][0]))]
            break
        permutations = list(itertools.permutations(speakers_ids + ['0' for i in range(models_container_size)], models_container_size))
        permutations = list(set(permutations))
        data[recording_id]['permutations'] = permutations
        data[recording_id]['oracle_lengths'] = oracle_lengths
        length = len(segments) * len(oracle_lengths) * len(permutations)
        data_indexes.append((data_length, data_length + length - 1, recording_id))
        data_length += length
    else:
        delete_recordings_ids.append(recording_id)
    print(recording_id, end =  '\r')
for recording_id in delete_recordings_ids:
    del data[recording_id]

iaqh

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split

class SegmentsDataset(Dataset):
    def __init__(self, recordings_data, recordings_data_length, recordings_data_indexes, mode = 'ivectors'):
        self.recordings_data = recordings_data
        self.recordings_data_length = recordings_data_length
        self.recordings_data_indexes = recordings_data_indexes
        self.mode = mode
    def __len__(self):
        return self.recordings_data_length
    def __getitem__(self, idx):
        data_index = list(filter(lambda data_index: data_index[0] <= idx and idx <= data_index[1], self.recordings_data_indexes))[0]
        recording_id = data_index[2]
        recording_data = self.recordings_data[recording_id]
        recording_index = idx - data_index[0] # Index relative to the recording data
        segment_index, remainder = divmod(recording_index, (len(recording_data['oracle'][next(iter(recording_data['oracle']))]) * len(recording_data['permutations'])))
        segment_index = recording_data['indexes'][segment_index]
        oracle_index, permutation_index = divmod(remainder, len(recording_data['permutations']))
        segment = json.loads(open(recording_data['filepath']).readlines()[segment_index])
        vector = numpy.array(segment[self.mode][0]['value'])
        permutation = recording_data['permutations'][permutation_index]
        permutation = [recording_data['oracle'][speaker_id][recording_data['oracle_lengths'][oracle_index]][self.mode][0] for speaker_id in permutation]
        x = numpy.concatenate([vector] + permutation)
        y = numpy.asarray([speaker_id == segment['speakers'][0]['speaker_id'] for speaker_id in recording_data['permutations'][permutation_index]], dtype = float)
        return x, y
    
segments_dataset = SegmentsDataset(data, data_length, data_indexes)
print(len(segments_dataset))
train_length = int(len(segments_dataset) * 0.7)
test_length = len(segments_dataset) - train_length
train_dataset, test_dataset = random_split(segments_dataset, [train_length, test_length])
train_dataloader = DataLoader(train_dataset, batch_size = 10, shuffle=True, num_workers = 4)
test_dataloader = DataLoader(test_dataset, batch_size = 1, shuffle=False, num_workers = 1)

3219720


In [12]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc0 = nn.Linear(128 * (models_container_size + 1), 512)
        self.fc1 = nn.Linear(512, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 512)
        self.fc4 = nn.Linear(512, 512)
        self.fc5 = nn.Linear(512, 512)
        self.fc6 = nn.Linear(512, 512)
        self.fc7 = nn.Linear(512, models_container_size)
    def forward(self, x):
        x = F.relu(self.fc0(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = torch.sigmoid(self.fc7(x))
        return x
    
net = Net().cuda()

In [None]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr = 0.001)

#criterion = nn.CrossEntropyLoss()
#criterion = nn.MSELoss()
criterion = nn.BCELoss()

epochs = 3
for epoch in range(epochs):
    for input, target in train_dataloader:
        input = input.to('cuda', non_blocking=True)
        target = target.to('cuda', non_blocking=True)
        net.zero_grad()
        output = net(input.float())
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()
        print(loss, end = '\r')
        if loss < 0.2
            break
        

In [None]:
with torch.no_grad():
    for input, target in test_dataloader:
        output = net(input.float())
        print('target:', target)
        print('output:', output)

In [None]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
print(input)
print(target)
output = loss(input, target)
print(output)