# VGGVox

In [1]:
import pandas as pd
import os
import numpy as np
import re

In [2]:
if not os.path.exists('VGGVox-PyTorch'):
    ! git clone https://github.com/Derpimort/VGGVox-PyTorch.git
    if not os.path.exists('VGGVox-PyTorch/dataset'):
        os.system('mkdir dataset')
    os.system('rm -r VGGVox-PyTorch/data')
    os.system('cp -r dataset/ VGGVox-PyTorch/dataset/' )
    os.chdir('VGGVox-PyTorch')
    ! pip install -r requirements.txt
    
else:
    if not os.path.exists('VGGVox-PyTorch/dataset'):
        os.system('mkdir dataset')
    os.system(' cp -r dataset/ VGGVox-PyTorch/dataset/' )
    os.chdir('VGGVox-PyTorch')
    ! pip install -r requirements.txt

Collecting argparse
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


In [3]:
import torch
from torch.utils.data import Subset, Dataset, DataLoader
from tqdm.auto import tqdm
from vggm import VGGM
import argparse
from train import AudioDataset, accuracy, ppdf, LOCAL_DATA_DIR, MODEL_DIR

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import time
%matplotlib inline

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=VGGM(1251)
model.load_state_dict(torch.load(MODEL_DIR+"VGGM300_BEST_140_81.99.pth", map_location=device))
model.to(device)
model.eval()

### This is to extract an activation from one layer ...
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [None]:
DATA_DIR = '/Users/isabel.rodriguez/Desktop/Isa/MUIT - Machine Learning/DLAS/project/VGGVox-PyTorch'

df = pd.read_csv('./dataset/labels.csv')

Datasets={"test":AudioDataset(df, DATA_DIR, is_train=False)}

Dataloaders={i:DataLoader(Datasets[i], batch_size=1, shuffle=False, num_workers=2) for i in Datasets}

embedding_arr = []

for audio, labels in Dataloaders['test']:
        audio = audio.to(device)
        labels = labels.to(device)
        model.classifier.fc7.register_forward_hook(get_activation('fc7'))
        outputs = model(audio)

        embedding_arr.append(activation['fc7'].cpu().numpy().reshape(-1))

df['Embeddings'] = pd.Series(embedding_arr)

In [None]:
df

## Take first embedding of classes 0 and 1 as reference 

In [None]:
df["Pred_Label"] = 0

speaker_t1 = 0
speaker_t2 = 1
embedding_1 = df[df['Label']== speaker_t1].iloc[0]['Embeddings']
embedding_2 = df[df['Label']== speaker_t2].iloc[0]['Embeddings']

similarity = cosine_similarity(embedding_1.reshape(1,-1), embedding_2.reshape(1,-1))
cosine_distance = 1 - similarity

print('Cosine similarity: ', similarity, ' cosine distance: ', cosine_distance)


print('\n\n-----------------Speaker: ', speaker_t1)
print(df[df['Label']== speaker_t1].iloc[0],'\n')


print('-----------------Speaker: ', speaker_t2)
print(df[df['Label']== speaker_t2].iloc[0])


In [None]:
for i in range (0, len(df['Embeddings'])):
    if i!=0 and i !=50:
        similarity_sp1 = cosine_similarity(embedding_1.reshape(1,-1), df['Embeddings'][i].reshape(1,-1))
        cosine_distance_sp1 = 1 - similarity_sp1
        similarity_sp2 = cosine_similarity(embedding_2.reshape(1,-1), df['Embeddings'][i].reshape(1,-1))
        cosine_distance_sp2 = 1 - similarity_sp2

        print('Cosine similarity with speaker 1: ', similarity_sp1, ' cosine distance with speaker 1: ', cosine_distance_sp1)
        print('Cosine similarity with speaker 2: ', similarity_sp2, ' cosine distance with speaker 2: ', cosine_distance_sp2)
        
        if min(cosine_distance_sp1,cosine_distance_sp2) == cosine_distance_sp1:
            df['Pred_Label'][i] = 0
        elif min(cosine_distance_sp1,cosine_distance_sp2) == cosine_distance_sp2:
            df['Pred_Label'][i] = 1
        else:
            df['Pred_Label'][i] = np.nan
    else:
        df['Pred_Label'].iloc[0] = 0
        df['Pred_Label'].iloc[50] = 1


In [None]:
# Calculate the confusion matrix
#
conf_matrix = confusion_matrix(y_true=df['Label'], y_pred=df['Pred_Label'])
#
# Print the confusion matrix using Matplotlib
#
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')

plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
cm_results = {'tn': conf_matrix[0, 0], 'fp': conf_matrix[0, 1], 'fn': conf_matrix[1, 0], 'tp': conf_matrix[1, 1]}

test_acc = (cm_results['tp']+cm_results['tn'])/(cm_results['tp']+cm_results['fp']+cm_results['tn']+cm_results['fn'])
test_precision = cm_results['tp']/(cm_results['tp']+cm_results['fp'])
test_recall = cm_results['tp']/(cm_results['tp']+cm_results['fn'])
test_F1_Score = (2* test_precision * test_recall)/ (test_precision + test_recall)

print("Test accuracy: {}".format(test_acc))
print("Test precision: {}".format(test_precision))
print("Test recall: {}".format(test_recall))
print("Test f1 score: {}".format(test_F1_Score))

### PCA

In [None]:
n_components = 100 #maximum number of components

X = np.stack(df['Embeddings'].values)

scaler = StandardScaler()
scaler.fit(X)

X_scaled = scaler.transform(X)

pca = PCA(n_components = n_components)

pca.fit(X_scaled)

In [None]:
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');

In [None]:
group = df['Label']
cdict = {0: 'red', 1: 'blue'}

Z1 = pca.transform(X_scaled)[:,0]
Z2 = pca.transform(X_scaled)[:,1]

fig, ax = plt.subplots()
for g in np.unique(group):
    ix = np.where(group == g)
    ax.scatter(Z1[ix], Z2[ix], c = cdict[g], label = g, s = 50)

plt.xlabel("First Principal Component",fontsize=14)
plt.ylabel("Second Principal Component",fontsize=14)
ax.legend()

In [None]:
time_start = time.time()

X_tsne = TSNE(random_state=123).fit_transform(X_scaled)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

In [None]:
X_tsne.shape

In [None]:
color = []
for label in df['Label']:
    if label == 0:
        color.append('red')
    elif label == 1:
        color.append('blue')

plt.scatter(X_tsne[:,0] ,X_tsne[:,1], c = color)

## Take the mean of the 10 first embeddings of classes 0 and 1 as reference

In [None]:
df["Pred_Label"] = 0

speaker_t1 = 0
speaker_t2 = 1
embedding_1 = df[df['Label']== speaker_t1].iloc[:10]['Embeddings'].mean()
embedding_2 = df[df['Label']== speaker_t2].iloc[:10]['Embeddings'].mean()


similarity = cosine_similarity(embedding_1.reshape(1,-1), embedding_2.reshape(1,-1))
cosine_distance = 1 - similarity

print('Cosine similarity: ', similarity, ' cosine distance: ', cosine_distance)


print('\n\n-----------------Speaker: ', speaker_t1)
print(df[df['Label']== speaker_t1].iloc[0],'\n')


print('-----------------Speaker: ', speaker_t2)
print(df[df['Label']== speaker_t2].iloc[0])


In [None]:
for i in range (0, len(df['Embeddings'])):
    
    similarity_sp1 = cosine_similarity(embedding_1.reshape(1,-1), df['Embeddings'][i].reshape(1,-1))
    cosine_distance_sp1 = 1 - similarity_sp1
    similarity_sp2 = cosine_similarity(embedding_2.reshape(1,-1), df['Embeddings'][i].reshape(1,-1))
    cosine_distance_sp2 = 1 - similarity_sp2

    print('Cosine similarity with speaker 1: ', similarity_sp1, ' cosine distance with speaker 1: ', cosine_distance_sp1)
    print('Cosine similarity with speaker 2: ', similarity_sp2, ' cosine distance with speaker 2: ', cosine_distance_sp2)
        
    if min(cosine_distance_sp1,cosine_distance_sp2) == cosine_distance_sp1:
        df['Pred_Label'][i] = 0
    elif min(cosine_distance_sp1,cosine_distance_sp2) == cosine_distance_sp2:
        df['Pred_Label'][i] = 1
    else:
        df['Pred_Label'][i] = np.nan



In [None]:
# Calculate the confusion matrix
#
conf_matrix = confusion_matrix(y_true=df['Label'], y_pred=df['Pred_Label'])
#
# Print the confusion matrix using Matplotlib
#
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')

plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
cm_results = {'tn': conf_matrix[0, 0], 'fp': conf_matrix[0, 1], 'fn': conf_matrix[1, 0], 'tp': conf_matrix[1, 1]}

test_acc = (cm_results['tp']+cm_results['tn'])/(cm_results['tp']+cm_results['fp']+cm_results['tn']+cm_results['fn'])
test_precision = cm_results['tp']/(cm_results['tp']+cm_results['fp'])
test_recall = cm_results['tp']/(cm_results['tp']+cm_results['fn'])
test_F1_Score = (2* test_precision * test_recall)/ (test_precision + test_recall)

print("Test accuracy: {}".format(test_acc))
print("Test precision: {}".format(test_precision))
print("Test recall: {}".format(test_recall))
print("Test f1 score: {}".format(test_F1_Score))

### PCA

In [None]:
n_components = 100 #maximum number of components

X = np.stack(df['Embeddings'].values)

scaler = StandardScaler()
scaler.fit(X)

X_scaled = scaler.transform(X)

pca = PCA(n_components = n_components)

pca.fit(X_scaled)

In [None]:
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');

In [None]:
group = df['Label']
cdict = {0: 'red', 1: 'blue'}

Z1 = pca.transform(X_scaled)[:,0]
Z2 = pca.transform(X_scaled)[:,1]

fig, ax = plt.subplots()
for g in np.unique(group):
    ix = np.where(group == g)
    ax.scatter(Z1[ix], Z2[ix], c = cdict[g], label = g, s = 50)

plt.xlabel("First Principal Component",fontsize=14)
plt.ylabel("Second Principal Component",fontsize=14)
ax.legend()

In [None]:
time_start = time.time()

X_tsne = TSNE(random_state=123).fit_transform(X_scaled)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

In [None]:
X_tsne.shape

In [None]:
color = []
for label in df['Label']:
    if label == 0:
        color.append('red')
    elif label == 1:
        color.append('blue')

plt.scatter(X_tsne[:,0] ,X_tsne[:,1], c = color)