In [1]:
import sys
spath = '../'
sys.path.insert(0,spath)
from scripts.models import get_rttm_segments_features, sort_segments_by_speakers, Ivector
from scripts.notebook import \
get_best_speakers, \
limit_segments_speakers_names, \
limit_segments_speakers_length, \
balance_speakers_segments_length, \
get_speakers_segments_indexes, \
get_speakers_models, \
get_speakers_permutations, \
get_speakers_weights_2, \
Permutations

import itertools
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.animation as animation

### Loading data from drive

In [2]:
dimension = 128

dev_a_rttm     = '../augmented/callhome/callhome1/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/ref.rttm'
dev_a_segments = '../augmented/callhome/callhome1/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/segments'
dev_a_ivectors = '../augmented/callhome/callhome1/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/exp/make_ivectors/ivector.txt'
dev_a_files_segments = get_rttm_segments_features(dev_a_rttm, dev_a_segments, dev_a_ivectors)

dev_b_rttm     = '../augmented/callhome/callhome1/augmented_0/1.0_0.3_0.5/' + str(dimension) + '/ref.rttm'
dev_b_segments = '../augmented/callhome/callhome1/augmented_0/1.0_0.3_0.5/' + str(dimension) + '/segments'
dev_b_ivectors = '../augmented/callhome/callhome1/augmented_0/1.0_0.3_0.5/' + str(dimension) + '/exp/make_ivectors/ivector.txt'
dev_b_files_segments = get_rttm_segments_features(dev_b_rttm, dev_b_segments, dev_b_ivectors)

dev_c_rttm     = '../augmented/callhome/callhome1/augmented_0/0.5_0.3_0.5/' + str(dimension) + '/ref.rttm'
dev_c_segments = '../augmented/callhome/callhome1/augmented_0/0.5_0.3_0.5/' + str(dimension) + '/segments'
dev_c_ivectors = '../augmented/callhome/callhome1/augmented_0/0.5_0.3_0.5/' + str(dimension) + '/exp/make_ivectors/ivector.txt'
dev_c_files_segments = get_rttm_segments_features(dev_c_rttm, dev_c_segments, dev_c_ivectors)

### Filtering data

In [3]:
# Getting the two first speakers of each file
dev_a_files_speakers = get_best_speakers(dev_a_files_segments, 2)
dev_b_files_speakers = get_best_speakers(dev_b_files_segments, 2)
dev_c_files_speakers = get_best_speakers(dev_c_files_segments, 2)

# Removing single-speaker files
'''delete = [file_id for file_id, speakers_names in dev_a_files_speakers.items() if len(speakers_names) < 2]
for file_id in delete:
    del dev_a_files_speakers[file_id]
delete = [file_id for file_id, speakers_names in dev_b_files_speakers.items() if len(speakers_names) < 2]
for file_id in delete:
    del dev_b_files_speakers[file_id]
delete = [file_id for file_id, speakers_names in dev_c_files_speakers.items() if len(speakers_names) < 2]
for file_id in delete:
    del dev_c_files_speakers[file_id]'''

# Filtering segments that only contain speakers from the list
dev_a_files_segments_lim = limit_segments_speakers_names(dev_a_files_segments, dev_a_files_speakers, log = True)
dev_b_files_segments_lim = limit_segments_speakers_names(dev_b_files_segments, dev_b_files_speakers, log = True)
dev_c_files_segments_lim = limit_segments_speakers_names(dev_c_files_segments, dev_c_files_speakers, log = True)

# Filtering segments thath only contain one speaker
dev_a_files_segments_lim = limit_segments_speakers_length(dev_a_files_segments_lim, 1, log = True)
dev_b_files_segments_lim = limit_segments_speakers_length(dev_b_files_segments_lim, 1, log = True)
dev_c_files_segments_lim = limit_segments_speakers_length(dev_c_files_segments_lim, 1, log = True)

Kept 25942 of 35773: 0.725183797836357
Kept 38643 of 53526: 0.7219482120838471
Kept 115828 of 160980: 0.7195179525406883
Kept 17709 of 25942: 0.6826381928918356
Kept 26371 of 38643: 0.6824263126568848
Kept 79193 of 115828: 0.6837120558068861


In [4]:
from torch.utils.data import Dataset

def invert_speaker_weights(speakers_weights):
    weight_sum = sum(speakers_weights.values())
    weight_count = len(speakers_weights.values())
    speakers_weights_inverse = {}
    for speaker_name, weight in speakers_weights.items():
        speakers_weights_inverse[speaker_name] = (weight_sum - speakers_weights[speaker_name]) / ((weight_count - 1) * weight_sum)
    return speakers_weights_inverse

class Files_dataset(Dataset):
    def __init__(self,
                 files_segments,
                 models_generation_lengths = [3, 5, 7],
                 models_container_length = 2,
                 include_zeros = True,
                 include_overlaps = False,
                 feature = 'ivectors',
                 zeros_multiplier = 1):
        self.files_segments = files_segments
        self.models_generation_lengths = models_generation_lengths
        self.models_container_length = models_container_length
        self.include_zeros = include_zeros
        self.include_overlaps = include_overlaps
        self.feature = feature
        self.zeros_multiplier = zeros_multiplier
        self.speakers_segments_indexes = {}
        self.speakers_models = {}
        self.speakers_models_combinations_length = {}
        self.complete_turns = {}
        self.speakers_permutations = {}
        self.speakers_weights = {}
        self.lookup = []
        self.length = 0
        for file_id, segments in self.files_segments.items():
            self.speakers_segments_indexes[file_id] = get_speakers_segments_indexes(enumerate(segments))
            self.speakers_models[file_id] = get_speakers_models(segments,
                                                                self.speakers_segments_indexes[file_id],
                                                                self.models_generation_lengths)
            self.speakers_models_combinations_length[file_id] = len(self.models_generation_lengths) ** len(self.speakers_models[file_id].keys())            
            self.complete_turns[file_id] = 2
            
            length = self.complete_turns[file_id] * self.speakers_models_combinations_length[file_id] * len(segments)
            self.speakers_permutations[file_id] = Permutations(self.speakers_models[file_id].keys(),
                                                               length,
                                                               self.models_container_length,
                                                               self.include_zeros)
            speakers_weights = self.speakers_permutations[file_id].get_speakers_names_counts()
            if self.include_zeros and '0' in speakers_weights:
                speakers_weights['0'] *= (1 / self.zeros_multiplier)
            self.speakers_weights[file_id] = invert_speaker_weights(speakers_weights)

            self.lookup.append({ 'file_id': file_id, 'onset': self.length, 'end': self.length + length - 1 })
            self.length += length

    def __len__(self):
        return self.length

    def __getitem__(self, key):
        lookup = [value for value in self.lookup if value['onset'] <= key and key <= value['end']][0]
        file_id = lookup['file_id']
        index = key - lookup['onset']
        segments = self.files_segments[file_id]
        permutation = self.speakers_permutations[file_id][index]
        _, turn_index = divmod(index, self.speakers_models_combinations_length[file_id] * len(segments))
        
        model_index, segment_index = divmod(turn_index, len(segments))
        
        models = {}
        models_speakers_names = list(self.speakers_models[file_id].keys())
        remainder = model_index
        for speaker_name_index, speaker_name in enumerate(models_speakers_names):
            models_lengths = [len(self.speakers_models[file_id][speaker_name].keys()) for speaker_name in models_speakers_names[speaker_name_index + 1:]]
            if index != len(models_speakers_names) - 1:
                model_index, remainder = divmod(remainder, int(np.prod(models_lengths)))
            else:
                model_index = remainder
            models[speaker_name] = self.speakers_models[file_id][speaker_name][list(self.speakers_models[file_id][speaker_name].keys())[model_index]]

        models_container = [models[speaker_name] if speaker_name != '0' else { 'ivectors': [Ivector(np.random.uniform(-0.1, 0.1, dimension).astype(np.float32))] } for speaker_name in permutation]
        
        segment = segments[segment_index]
        segment_speakers_names = [speaker.get_name() for speaker in segment.get_speakers()]
        
        x = [embeddings[self.feature][0].get_value() for embeddings in models_container + [{ 'ivectors': segment.get_ivectors() }]]
        if self.include_overlaps:
            segment_speaker_name = ','.join(sorted(set(segment_speakers_names)))
            y = np.asarray([speaker_name == segment_speaker_name for speaker_name in permutation], dtype = np.float32)
        else:
            y = np.asarray([speaker_name in segment_speakers_names for speaker_name in permutation], dtype = np.float32) / len(segment_speakers_names)
        w =  np.asarray([self.speakers_weights[file_id][speaker_name] for speaker_name in permutation], dtype = np.float32)
        
        return x, y, w

In [5]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, b, M):
        super(Model, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv1d((M + 1), M ** 3, 3),
            nn.ReLU(),
            nn.Conv1d(M ** 3, M ** 2, 3),
            nn.ReLU(),
            nn.Conv1d(M ** 2, M, 3),
            nn.ReLU(),
        )
        self.fc1 = nn.Sequential(
            nn.Linear((b - 6) * M, M * 32),
            nn.ReLU(),
            nn.Linear(M * 32, M * 16),
            nn.ReLU(),
            nn.Linear(M * 16, M),
            nn.Sigmoid(),
        )
    def forward(self, x):
        x = torch.stack(x, 1)
        x = self.cnn1(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        return x

In [6]:
%matplotlib notebook
import matplotlib.pyplot as plt

class Train_graph:
    def __init__(self, training_loss = 1):
        self.step = 0
        self.training_losses_x = [self.step - 1, self.step]
        self.training_losses_y = [training_loss, training_loss]
        self.validation_losses_x = None
        self.validation_losses_y = None
        self.fig = plt.figure()
        self.ax = self.fig.add_subplot()
        self.training_line, = self.ax.plot(self.training_losses_x, self.training_losses_y, '--', label = 'Training')
        self.validation_line = None
        self.ax.set_ylim(0, training_loss)
        self.ax.set_xlabel('Epoch')
        self.ax.set_ylabel('Loss')
        self.ax.legend()
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()
    def draw(self, training_loss, validaton_loss = None):
        self.step += 1
        self.training_losses_x.append(self.step)
        self.training_losses_y.append(training_loss)
        self.training_line.set_xdata(self.training_losses_x)
        self.training_line.set_ydata(self.training_losses_y)
        if validation_loss is not None:
            if self.validation_line is None:
                self.validation_losses_x = [self.step - 1]
                self.validation_losses_y = [validation_loss]
                self.validation_line = self.ax.plot(self.validation_losses_x, self.validation_losses_y, label = 'Validation')[0]
                self.ax.legend()
            self.validation_losses_x.append(self.step)
            self.validation_losses_y.append(validation_loss)
            self.validation_line.set_xdata(self.validation_losses_x)
            self.validation_line.set_ydata(self.validation_losses_y)
        self.ax.set_xlim(0, self.step + 1)
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()

In [109]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import random

seed = 19970917 #25005233 19970917
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

zeros_multiplier = 1.0

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')

a_files_ids = list(dev_a_files_segments_lim.keys())
a_validation_files_ids = random.sample(a_files_ids, int(len(a_files_ids) * 0.2))
a_train_files_ids = [file_id for file_id in a_files_ids if file_id not in a_validation_files_ids]

a_train_files_segments = {}
b_train_files_segments = {}
c_train_files_segments = {}
for file_id in a_train_files_ids:
    a_train_files_segments[file_id] = dev_a_files_segments_lim[file_id]
    b_train_files_segments[file_id] = dev_b_files_segments_lim[file_id]
    c_train_files_segments[file_id] = dev_c_files_segments_lim[file_id]
a_validation_files_segments = {}
b_validation_files_segments = {}
c_validation_files_segments = {}
for file_id in a_validation_files_ids:
    a_validation_files_segments[file_id] = dev_a_files_segments_lim[file_id]
    b_validation_files_segments[file_id] = dev_b_files_segments_lim[file_id]
    c_validation_files_segments[file_id] = dev_c_files_segments_lim[file_id]

a_train_dataset = Files_dataset(a_train_files_segments, [3],zeros_multiplier = zeros_multiplier)
b_train_dataset = Files_dataset(b_train_files_segments, [5],zeros_multiplier = zeros_multiplier)
c_train_dataset = Files_dataset(c_train_files_segments, [18],zeros_multiplier = zeros_multiplier)
a_train_dataloader = DataLoader(a_train_dataset, batch_size = 32, shuffle = True, num_workers = 8)
b_train_dataloader = DataLoader(b_train_dataset, batch_size = 32, shuffle = True, num_workers = 8)
c_train_dataloader = DataLoader(c_train_dataset, batch_size = 32, shuffle = True, num_workers = 8)

a_validation_dataset = Files_dataset(a_validation_files_segments, [3],zeros_multiplier = zeros_multiplier)
b_validation_dataset = Files_dataset(b_validation_files_segments, [5],zeros_multiplier = zeros_multiplier)
c_validation_dataset = Files_dataset(c_validation_files_segments, [18],zeros_multiplier = zeros_multiplier)
a_validation_dataloader = DataLoader(a_validation_dataset, batch_size = 32, shuffle = True, num_workers = 8)
b_validation_dataloader = DataLoader(b_validation_dataset, batch_size = 32, shuffle = True, num_workers = 8)
c_validation_dataloader = DataLoader(c_validation_dataset, batch_size = 32, shuffle = True, num_workers = 8)

net = Model(dimension, 2).to(device, non_blocking = True)
optimizer = optim.Adam(net.parameters(), lr = 0.0001)

losses = []
with torch.no_grad():
    for dataloader in [a_train_dataloader]:
        for x, y, w in dataloader:
            x = [tensor.to(device, non_blocking = True).float() for tensor in x]
            y = y.to(device, non_blocking = True).float()
            w = w.to(device, non_blocking = True).float()
            criterion = nn.BCELoss(w)
            y_ = net(x)
            loss = criterion(y_, y)
            losses.append(loss.data)
train_loss = (np.sum(losses) / len(losses)).cpu()

train_graph = Train_graph(train_loss)

for train_dataloader, validation_dataloader in [(a_train_dataloader, a_validation_dataloader),
                                                (b_train_dataloader, b_validation_dataloader),
                                                (c_train_dataloader, c_validation_dataloader)]:
    for epoch in range(20):
        losses = []
        for x, y, w in train_dataloader:
            x = [tensor.to(device, non_blocking = True).float() for tensor in x]
            y = y.to(device, non_blocking = True).float()
            w = w.to(device, non_blocking = True).float()

            criterion = nn.BCELoss(w)
            net.zero_grad()
            y_ = net(x)
            loss = criterion(y_, y)
            loss.backward()
            optimizer.step()

            losses.append(loss.data)
        train_loss = np.sum(losses) / len(losses)
        
        validation_losses = []
        with torch.no_grad():
            for x, y, w in validation_dataloader:
                x = [tensor.to(device, non_blocking = True).float() for tensor in x]
                y = y.to(device, non_blocking = True).float()
                w = w.to(device, non_blocking = True).float()
                criterion = nn.BCELoss(w)
                y_ = net(x)
                loss = criterion(y_, y)
                
                validation_losses.append(loss)
        validation_loss = np.sum(validation_losses) / len(validation_losses)
        
        train_graph.draw(train_loss, validation_loss)
        print(train_loss, end = '\r')

<IPython.core.display.Javascript object>

tensor(0.0907, device='cuda:0')

In [58]:
a_eval_rttm     = '../augmented/callhome/callhome2/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/ref.rttm'
a_eval_segments = '../augmented/callhome/callhome2/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/segments'
a_eval_ivectors = '../augmented/callhome/callhome2/augmented_0/1.5_0.3_0.5/' + str(dimension) + '/exp/make_ivectors/ivector.txt'
a_eval_files_segments = get_rttm_segments_features(a_eval_rttm, a_eval_segments, a_eval_ivectors)

In [157]:
# Filtering segments thath only contain one speaker
a_eval_files_segments_lim = limit_segments_speakers_length(a_eval_files_segments, 1, log = True)

# Getting the two first speakers of each file
a_eval_files_speakers_names = get_best_speakers(a_eval_files_segments_lim, 2)

# Filtering segments that only contain speakers from the list
a_eval_files_segments_lim = limit_segments_speakers_names(a_eval_files_segments_lim, a_eval_files_speakers_names, log = True)

Kept 19828 of 33962: 0.5838289853365526
Kept 17773 of 19828: 0.8963586846883196


In [230]:
class File_dataset(Dataset):
    def __init__(self,
                 segments,
                 models_generation_length,
                 models_container_speakers_names,
                 models_container_length = None,
                 include_overlaps = False,
                 feature = 'ivectors'):
        self.segments = segments
        self.models_generation_length = models_generation_length
        self.models_container_speakers_names = models_container_speakers_names
        self.models_container_length = models_container_length if isinstance(models_container_length, int) else len(models_container_speakers_names)
        if len(self.models_container_speakers_names) < self.models_container_length:
            self.models_container_speakers_names += ['0' for index in range(self.models_container_length - len(self.models_container_speakers_names))]
        self.include_overlaps = include_overlaps
        self.feature = feature
        
        speakers_segments_indexes = get_speakers_segments_indexes(enumerate(segments))
        self.speakers_models = get_speakers_models(self.segments,
                                                   speakers_segments_indexes,
                                                   [self.models_generation_length])
        self.models_container = [self.speakers_models[speaker_name][self.models_generation_length] if speaker_name != '0' else { 'ivectors': [Ivector(np.random.uniform(-0.1, 0.1, dimension).astype(np.float32))] } for speaker_name in self.models_container_speakers_names]
        
    def __len__(self):
        return len(self.segments)
    
    def __getitem__(self, key):
        segment = self.segments[key]
        x = [embeddings[self.feature][0].get_value() for embeddings in self.models_container + [{ 'ivectors': segment.get_ivectors() }]]
        segment_speakers_names = [speaker.get_name() for speaker in segment.get_speakers()]
        if self.include_overlaps:
            segment_speaker_name = ','.join(sorted(set(segment_speakers_names)))
            y = np.asarray([speaker_name == segment_speaker_name for speaker_name in self.models_container_speakers_names], dtype = np.float32)
        else:
            y = np.asarray([speaker_name in segment_speakers_names for speaker_name in self.models_container_speakers_names], dtype = np.float32) / len(segment_speakers_names)
        return x, y

good = 0
bad = 0
for file_id, segments in a_eval_files_segments_lim.items():
    file_dataset = File_dataset(segments, 3, a_eval_files_speakers_names[file_id], 2)
    file_dataloader = DataLoader(file_dataset)
    with torch.no_grad():
        for x, y in file_dataloader:
            x = [tensor.to(device, non_blocking = True).float() for tensor in x]
            y = y.to(device, non_blocking = True).float()
            y_ = net(x)
            y_one_hot = torch.zeros(y_.shape).to(device, non_blocking = True).scatter(1, y_.argmax(1).unsqueeze(1), 1.0)
            if (y_one_hot.cpu().numpy()[0] == y.cpu().numpy()[0]).all():
                good += 1
            else:
                bad += 1
print('good:', good, str(good / (good + bad)) + '%', 'bad:', bad, str(bad / (good + bad)) + '%')
            

good: 14142 0.7957013447363979% bad: 3631 0.20429865526360208%
