# Example:
* # EXTRACT EMBEDDINGS from TWO wav files using a pre-trained VGGVox for model
* # Obtain the distance between them

## In this example we use some audios from two speakers from VoxCeleb id10270 and id10275

* ## See previous VGGVOx Notebooks and also:

https://github.com/Derpimort/VGGVox-PyTorch

https://rzimmermann.com/coding/vggvox-pytorch


* ## Clonar el github: VGGVox PyTorch

In [1]:
! git clone https://github.com/Derpimort/VGGVox-PyTorch.git

Cloning into 'VGGVox-PyTorch'...
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 101 (delta 1), reused 5 (delta 1), pack-reused 93[K
Receiving objects: 100% (101/101), 64.81 MiB | 10.25 MiB/s, done.
Resolving deltas: 100% (50/50), done.


In [2]:
ls ./VGGVox-PyTorch/

[0m[01;34mdata[0m/    [01;34mmodels[0m/        README.md         results.txt      test.wav  vggm.py
LICENSE  model_test.py  requirements.txt  signal_utils.py  train.py


## * Change the current directory to /content/VGGVox-PyTorch

In [1]:
cd /content/VGGVox-PyTorch


/content/VGGVox-PyTorch


# Install packages
# <font color=red>WARNING!!! once installed, you have to reset the virtual machine (see the warning message)

In [4]:
! pip install -r requirements.txt

Collecting argparse
  Downloading https://files.pythonhosted.org/packages/f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl
Installing collected packages: argparse
Successfully installed argparse-1.4.0


# LET's see what data do we need

In [2]:
ls -al ./data/

total 5232
drwxr-xr-x 3 root root    4096 Dec 16 11:26 [0m[01;34m.[0m/
drwxr-xr-x 6 root root    4096 Dec 16 11:30 [01;34m..[0m/
-rw-r--r-- 1 root root 4912512 Dec 16 11:26 iden_split.txt
-rw-r--r-- 1 root root  387503 Dec 16 11:26 val.pkl
-rw-r--r-- 1 root root   40782 Dec 16 11:26 vox1_meta.csv
drwxr-xr-x 4 root root    4096 Dec 16 11:27 [01;34mwav[0m/


# Create a wav directory inside data

In [3]:
cd ./data

/content/VGGVox-PyTorch/data


In [4]:
! mkdir ./wav

mkdir: cannot create directory ‘./wav’: File exists


# Mount our Google Drive and copy some wav files

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
ls /content/drive/'My Drive'/VGGVOX_PyTorch

id10270.zip                    VGGVox_Distance_Embedding.ipynb
id10275.zip                    VGGVox_Embeddings_EmoPCFBorja_wavDir_SVM.ipynb
[0m[01;34mloc1[0m/                          VGGVox_Embeddings_Simple.ipynb
[01;34mloc2[0m/                          VGGVox_Embeddings_wavDir.ipynb
MLLB_Keras_FontReco_CNN.ipynb  VGGVox_Embed_Female_Male_wavDir.ipynb
MLLB_OSA_Simple_PCA.ipynb      VGGVox_Embed_Female_Male_wavDir_SVM.ipynb
PFCBorjaGAPS.zip               wavEmo_females_males.zip
VGGVox_1.ipynb                 wav.zip


In [10]:
cp -rf /content/drive/'My Drive'/VGGVOX_PyTorch/loc1 /content/VGGVox-PyTorch/data/wav/.

In [11]:
cp -rf /content/drive/'My Drive'/VGGVOX_PyTorch/loc2 /content/VGGVox-PyTorch/data/wav/.

In [7]:
ls /content/VGGVox-PyTorch/data/wav

[0m[01;34mloc1[0m/  [01;34mloc2[0m/


In [8]:
ls /content/VGGVox-PyTorch/data/wav/loc1

00001.wav  00002.wav  00003.wav  00004.wav  00005.wav


In [9]:
ls /content/VGGVox-PyTorch/data/wav/loc2

00001.wav  00003.wav  00005.wav  00007.wav  00009.wav  00011.wav
00002.wav  00004.wav  00006.wav  00008.wav  00010.wav  00012.wav


* ## **IT IS IMPORTANT TO MOVE to the VGGVox-PyTorch directory**

In [10]:
cd /content/VGGVox-PyTorch

/content/VGGVox-PyTorch



---

# Load the pre-trained VGGVox model

In [11]:
import pandas as pd
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Subset, Dataset, DataLoader
from tqdm.auto import tqdm
from vggm import VGGM
import argparse
from train import AudioDataset, accuracy, ppdf, LOCAL_DATA_DIR, MODEL_DIR
from sklearn.metrics.pairwise import cosine_similarity


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=VGGM(1251)
#model.load_state_dict(torch.load(DATA_DIR+"/VGGMVAL_BEST_149_80.84.pth", map_location=device))
model.load_state_dict(torch.load(MODEL_DIR+"VGGM300_BEST_140_81.99.pth", map_location=device))
model.to(device)
model.eval()

### This is to extract an activation from one layer ...

activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# **Create a function to obtaing embeddings and distance**

In [14]:
DATA_DIR = '/content/VGGVox-PyTorch/data/wav/'


def distance_embeddings(file1_wav, label1, file2_wav, label2, DATA_DIR, model):
  # label1 and label2 must be integers
  # file_wav must have subdirectory e.g. loc1/00001.wav

  ## NOTE: label must be an integer (to accomplish AudioDataset)
  df_F = pd.DataFrame([[3, file1_wav, label1],[3, file2_wav, label2]], 
                  columns = ['Set', 'Path', 'Label'])
  
  Datasets={
        "test":AudioDataset(df_F, DATA_DIR, is_train=False)}
  Dataloaders={i:DataLoader(Datasets[i], batch_size=1, shuffle=False, num_workers=2) for i in Datasets}

  embedding_arr = []

  for audio, labels in Dataloaders['test']:
        audio = audio.to(device)
        labels = labels.to(device)
        model.classifier.fc7.register_forward_hook(get_activation('fc7'))
        outputs = model(audio)

        embedding_arr.append(activation['fc7'].cpu().numpy())

  cosine_distance = cosine_similarity(embedding_arr[0], embedding_arr[1])

  return cosine_distance

# If the wav files are from <font color = red>different</font> speakers the cosine distance should be "smaller"

In [22]:
distance_embeddings('loc1/00005.wav',10270,'loc2/00001.wav',10275,DATA_DIR, model)

array([[0.76469314]], dtype=float32)

# If the wav files are from the <font color=red>same</font> speaker the cosine distance should be close to one

In [23]:
distance_embeddings('loc1/00005.wav',10270,'loc1/00001.wav',10270,DATA_DIR, model)

array([[0.9378784]], dtype=float32)

* # NEXT a simple Notebook to extract embeddings for a directory of wav files

VGGVox_Embedding