### Installing SpeechBrain

In [None]:
!pip install speechbrain

Collecting speechbrain
  Downloading speechbrain-0.5.16-py3-none-any.whl (630 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m630.6/630.6 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting hyperpyyaml (from speechbrain)
  Downloading HyperPyYAML-1.2.2-py3-none-any.whl (16 kB)
Collecting sentencepiece (from speechbrain)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Collecting ruamel.yaml>=0.17.28 (from hyperpyyaml->speechbrain)
  Downloading ruamel.yaml-0.18.5-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.4/116.4 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml>=0.17.28->hyperpyyaml->speechbrain)
  Downloading ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014

### Reading the Gooogle Speech Commands Dataset

In [None]:
!pip install -q kaggle

In [None]:
!mkdir ~/.kaggle

In [None]:
!mv kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c tensorflow-speech-recognition-challenge

Downloading tensorflow-speech-recognition-challenge.zip to /content
100% 3.50G/3.50G [00:34<00:00, 97.4MB/s]
100% 3.50G/3.50G [00:34<00:00, 110MB/s] 


In [None]:
!unzip -q tensorflow-speech-recognition-challenge.zip

In [None]:
!apt-get install -y p7zip-full
!7z x /content/train.7z

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
p7zip-full is already the newest version (16.02+dfsg-8).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.

7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.30GHz (306F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan /content/                   1 file, 1121103842 bytes (1070 MiB)

Extracting archive: /content/train.7z
--
Path = /content/train.7z
Type = 7z
Physical Size = 1121103842
Headers Size = 389133
Method = Delta LZMA2:24
Solid = +
Blocks = 2

  0%      0% 39 - train/audio/_background_noise_/dude_miaowing.wav                                                            0% 41 - train/aud

### Data Preparation

In [None]:
import os
import pandas as pd

data_dirs = ['/content/train/audio/go', '/content/train/audio/left', '/content/train/audio/right']

recordings_count = {}

for data_dir in data_dirs:
    individual_count = {}
    for filename in os.listdir(data_dir):
        if filename.endswith(".wav"):

            individual_id = filename.split('_')[0]
            individual_count[individual_id] = individual_count.get(individual_id, 0) + 1

    df = pd.DataFrame(list(individual_count.items()), columns=["Individual ID", "Number of Recordings"])
    df = df.sort_values(by="Number of Recordings", ascending=False)
    recordings_count[data_dir] = df

df_go = recordings_count['/content/train/audio/go']
df_left = recordings_count['/content/train/audio/left']
df_right = recordings_count['/content/train/audio/right']

print("DataFrame for 'go' directory:")
display(df_go)

print("\nDataFrame for 'left' directory:")
display(df_left)

print("\nDataFrame for 'right' directory:")
display(df_right)

DataFrame for 'go' directory:


Unnamed: 0,Individual ID,Number of Recordings
304,c1d39ce8,10
86,e7ea8b76,8
48,c120e80e,8
288,cb8f8307,8
38,7c1d8533,7
...,...,...
620,b36c27c2,1
621,a84dee7b,1
622,837f7378,1
623,126403d4,1



DataFrame for 'left' directory:


Unnamed: 0,Individual ID,Number of Recordings
274,cb8f8307,8
290,c1d39ce8,8
363,cb2929ce,7
149,28ce0c58,6
73,692a88e6,6
...,...,...
634,51f4d5b0,1
635,a3502f15,1
637,813b82a6,1
639,ccfd721c,1



DataFrame for 'right' directory:


Unnamed: 0,Individual ID,Number of Recordings
321,cb2929ce,9
425,9a7c1f83,8
297,c1d39ce8,8
463,cb8f8307,7
44,c120e80e,7
...,...,...
624,6071a214,1
625,31d68957,1
626,50928b05,1
627,4cb874bb,1


In [None]:
merged_df = pd.merge(df_go, df_left, on='Individual ID', how='outer', suffixes=('_df_go', '_df_left'))
merged_df = pd.merge(merged_df, df_right, on='Individual ID', how='outer')

merged_df = merged_df.fillna(0)
merged_df['Number of Recordings'] = merged_df['Number of Recordings_df_go'] + merged_df['Number of Recordings_df_left'] + merged_df['Number of Recordings']

merged_df = merged_df[['Individual ID', 'Number of Recordings']]

merged_df = merged_df.sort_values(by='Number of Recordings', ascending=False)
merged_df

Unnamed: 0,Individual ID,Number of Recordings
0,c1d39ce8,26.0
3,cb8f8307,23.0
6,cb2929ce,23.0
2,c120e80e,21.0
4,7c1d8533,19.0
...,...,...
745,d103dd6e,1.0
751,20d3f11f,1.0
1015,e7334395,1.0
1334,211ccd2e,1.0


### Taking recordings only from 50 speakers, since it's ideal for a household robot.

In [None]:
df = merged_df[:50]
df

Unnamed: 0,Individual ID,Number of Recordings
0,c1d39ce8,26.0
3,cb8f8307,23.0
6,cb2929ce,23.0
2,c120e80e,21.0
4,7c1d8533,19.0
5,28ce0c58,19.0
1,e7ea8b76,19.0
130,9a7c1f83,18.0
8,dbb40d24,17.0
70,d0faf7e4,17.0


In [None]:
# total number of recordings

print('Total number of recordings:', int(sum(df['Number of Recordings'])))

Total number of recordings: 806


### Shifting all the audios by these speakers into a separate directory.

In [None]:
import shutil

source_dirs = ['/content/train/audio/go', '/content/train/audio/left', '/content/train/audio/right']
target_dir = ['/content/Others']

for target_dir in target_dir:
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

counter = 0

for source_dir in source_dirs:
    for filename in os.listdir(source_dir):

        if filename.endswith(".wav"):
            file_individual_id = filename.split('_')[0]
            source_file = os.path.join(source_dir, filename)

            if file_individual_id in list(df['Individual ID']):
                counter+= 1
                target_file = os.path.join(target_dir, str(counter) + '_' + filename)
                shutil.copy(source_file, target_file)

In [None]:
dir_path = '/content/Others'
print('Total number of recordings:', len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))

Total number of recordings: 806


### Expanding Manasi audio recording dataset.

In [None]:
def copy_files(source_folder, num_copies):
    counter = 0
    for filename in os.listdir(source_folder):
        if filename.endswith(".wav"):
            source_path = os.path.join(source_folder, filename)

            for i in range(num_copies):
                random_name = f"{counter}.wav"
                counter+= 1
                destination_path = os.path.join(source_folder, random_name)
                shutil.copy2(source_path, destination_path)

    print(f"{num_copies} random copies of each WAV file created in '{source_folder}'.")

source_folder = "/content/Manasi"
num_copies = 27
copy_files(source_folder, num_copies)
print('Total number of recordings:', len([entry for entry in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, entry))]))

27 random copies of each WAV file created in '/content/Manasi'.
Total number of recordings: 840


### Loading the SpeechBrain Model

In [None]:
import torchaudio
from speechbrain.pretrained import EncoderClassifier
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")

hyperparams.yaml:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

embedding_model.ckpt:   0%|          | 0.00/83.3M [00:00<?, ?B/s]

mean_var_norm_emb.ckpt:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

classifier.ckpt:   0%|          | 0.00/5.53M [00:00<?, ?B/s]

label_encoder.txt:   0%|          | 0.00/129k [00:00<?, ?B/s]

### Creating Embeddings for all 'Manasi' & 'Others' Audio Files

In [None]:
def create_embeddings(source_dir):
  df = pd.DataFrame(columns=range(192))
  for file in os.listdir(source_dir):
    file_path = source_dir + '/' + file
    signal, fs = torchaudio.load(file_path)
    embeddings = classifier.encode_batch(signal)
    df = pd.concat([df, pd.DataFrame(embeddings[0])])
  return df

df_Others = create_embeddings('/content/Others')
Others_label_list = []
for i in range(len(df_Others)):
  Others_label_list.append('Others')
df_Others['label'] = Others_label_list

df_Manasi = create_embeddings('/content/Manasi')
Manasi_label_list = []
for i in range(len(df_Manasi)):
  Manasi_label_list.append('Manasi')
df_Manasi['label'] = Manasi_label_list

df = pd.concat([df_Others, df_Manasi])
display(df)
df.to_csv('speaker_embeddings.csv', index = False)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
0,1.188781,17.826025,29.743837,1.132418,-8.008463,9.594976,-29.848293,11.930159,-6.893939,34.704029,...,34.691238,38.219414,18.278406,1.545702,-23.225214,5.954052,12.148947,22.536795,18.404089,Others
0,21.707020,3.419267,31.416618,-84.044518,7.444968,-6.320386,3.244490,-26.952406,-13.728929,9.503801,...,14.642851,2.089398,3.390877,15.946771,15.910294,-11.697279,-7.393023,-33.532742,-20.465734,Others
0,4.979878,35.681538,6.289971,30.999308,-14.643521,3.067934,-8.209516,64.636337,16.456408,17.849964,...,-0.922462,2.950448,83.365479,28.307693,15.851522,47.825409,33.609451,32.861626,3.396942,Others
0,53.279789,-8.097881,33.759167,-2.624982,-12.148025,-17.556618,-11.175826,4.556599,15.412368,50.170025,...,1.469266,-3.921163,39.929855,-4.398089,7.558495,-1.753784,33.742870,-20.446404,14.832313,Others
0,-21.542282,-34.274128,-54.973614,-40.267109,3.219451,44.412140,27.380272,-19.670525,-2.460453,64.026077,...,27.556746,8.209702,0.627985,36.286274,-41.660221,59.717403,12.068033,25.669214,67.296898,Others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,58.308731,-10.397073,14.865171,6.461299,-6.064878,1.927005,1.670384,7.083302,-11.591825,-10.993269,...,60.271172,11.780120,16.643129,4.844342,22.817638,-1.164587,27.402000,13.587922,-7.796694,Manasi
0,28.772806,-1.919108,7.008942,27.679573,12.872961,-1.232876,-4.607184,32.450314,17.338671,-19.437466,...,60.926132,17.709055,11.775893,18.838877,0.896400,3.703606,22.772041,-5.973109,12.576173,Manasi
0,35.188187,7.279063,25.294664,9.007158,7.373981,12.630660,-10.253393,1.222779,2.137157,-14.739006,...,31.149916,37.544147,-2.633849,10.398765,23.277319,17.044540,26.742550,-0.692820,6.294021,Manasi
0,35.467445,5.723477,10.811752,17.891724,-1.837581,3.543923,13.526721,-0.422566,8.912366,-13.128389,...,54.354713,24.405313,17.112703,16.448229,9.242860,-2.808809,26.141262,14.492411,-4.258360,Manasi


### Classifying New Audio using Cosine Similarity

In [None]:
# first loading the data from csv
df = pd.read_csv('speaker_embeddings.csv')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
0,1.188781,17.826025,29.743837,1.132418,-8.008463,9.594976,-29.848293,11.930159,-6.893938,34.704030,...,34.691240,38.219414,18.278406,1.545702,-23.225214,5.954052,12.148947,22.536795,18.404089,Others
1,21.707020,3.419267,31.416618,-84.044520,7.444968,-6.320386,3.244490,-26.952406,-13.728929,9.503801,...,14.642851,2.089398,3.390877,15.946771,15.910294,-11.697279,-7.393023,-33.532740,-20.465734,Others
2,4.979878,35.681538,6.289971,30.999308,-14.643521,3.067934,-8.209516,64.636340,16.456408,17.849964,...,-0.922462,2.950448,83.365480,28.307693,15.851522,47.825410,33.609450,32.861626,3.396942,Others
3,53.279790,-8.097881,33.759167,-2.624982,-12.148025,-17.556618,-11.175826,4.556599,15.412368,50.170025,...,1.469266,-3.921163,39.929855,-4.398089,7.558495,-1.753784,33.742870,-20.446404,14.832313,Others
4,-21.542282,-34.274128,-54.973614,-40.267110,3.219451,44.412140,27.380272,-19.670525,-2.460453,64.026080,...,27.556746,8.209702,0.627985,36.286274,-41.660220,59.717403,12.068033,25.669214,67.296900,Others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,58.308730,-10.397073,14.865171,6.461299,-6.064877,1.927005,1.670384,7.083302,-11.591825,-10.993269,...,60.271170,11.780120,16.643130,4.844342,22.817638,-1.164587,27.402000,13.587922,-7.796694,Manasi
1642,28.772806,-1.919108,7.008942,27.679573,12.872961,-1.232876,-4.607184,32.450314,17.338670,-19.437466,...,60.926132,17.709055,11.775893,18.838877,0.896400,3.703606,22.772041,-5.973109,12.576173,Manasi
1643,35.188187,7.279063,25.294664,9.007158,7.373981,12.630660,-10.253393,1.222779,2.137157,-14.739006,...,31.149916,37.544147,-2.633849,10.398765,23.277319,17.044540,26.742550,-0.692820,6.294021,Manasi
1644,35.467445,5.723477,10.811752,17.891724,-1.837581,3.543923,13.526721,-0.422566,8.912366,-13.128389,...,54.354713,24.405313,17.112703,16.448229,9.242860,-2.808809,26.141262,14.492411,-4.258360,Manasi


In [None]:
from IPython.display import Audio
import librosa

# loading the audio file
signal, fs = torchaudio.load('/content/Manasi/179.wav')
Audio(signal, rate=fs)

In [None]:
# getting the embedding
embedding = classifier.encode_batch(signal)
embedding

tensor([[[ 2.8326e+01, -1.1602e+01,  9.9446e+00, -1.2526e+01, -4.0203e+00,
           7.8088e+00,  4.3269e+00,  1.6548e+01, -1.2226e+01, -4.4416e+00,
           6.6143e+00, -1.5990e+00,  1.0561e+01, -5.4851e+00,  2.4221e+01,
           1.0744e+01,  3.4838e+01,  7.5604e+00, -1.6458e+01, -6.2708e+00,
           1.5978e+01,  4.6237e+00, -2.4176e+01,  1.6328e+01,  1.2320e+01,
          -9.3171e-01, -3.9224e+01, -6.8576e+00,  2.4482e+01,  4.8925e+00,
           2.2807e+01, -1.2880e+01, -3.3527e+01, -2.1440e+01, -2.1571e+01,
          -1.5065e+01,  1.9346e+01,  1.5321e+01,  2.5613e+00,  2.8766e+01,
          -1.5316e+01,  2.5172e+00,  7.0820e+00, -1.1000e+01,  8.2962e+00,
           2.1481e+00, -4.8953e+00, -9.2675e-01,  2.9138e+01,  1.9273e+01,
          -2.1571e+01, -8.0294e+00, -8.6366e+00,  1.6370e+01,  2.5126e+00,
           2.5879e+00, -2.6356e+01,  2.8324e+01,  1.0642e+01,  2.8102e+00,
           1.8114e+01, -6.9189e+00, -2.2616e+01,  3.7054e+00, -1.6470e+01,
           2.6909e+01,  1

In [None]:
list(embedding[0][0].numpy())

[28.325623,
 -11.601798,
 9.944557,
 -12.526253,
 -4.0203238,
 7.808796,
 4.326913,
 16.547916,
 -12.225913,
 -4.44163,
 6.61432,
 -1.5989561,
 10.560854,
 -5.485078,
 24.220572,
 10.743987,
 34.838497,
 7.560401,
 -16.45784,
 -6.2708282,
 15.977772,
 4.623699,
 -24.176197,
 16.327602,
 12.319829,
 -0.9317121,
 -39.22376,
 -6.857602,
 24.48177,
 4.8925366,
 22.806854,
 -12.879665,
 -33.52738,
 -21.439707,
 -21.571024,
 -15.064748,
 19.346052,
 15.320957,
 2.5612676,
 28.765635,
 -15.315801,
 2.5171514,
 7.0819826,
 -11.000251,
 8.296191,
 2.1480558,
 -4.895261,
 -0.92674583,
 29.138016,
 19.272612,
 -21.571278,
 -8.029399,
 -8.636584,
 16.369574,
 2.5125625,
 2.5878828,
 -26.35555,
 28.323606,
 10.642162,
 2.8101969,
 18.114326,
 -6.9189425,
 -22.615736,
 3.7053585,
 -16.470394,
 26.908594,
 14.488457,
 -15.707494,
 -4.0053782,
 -18.583574,
 10.697835,
 46.263992,
 2.9717145,
 -18.07389,
 -9.560369,
 -14.810658,
 -32.36318,
 1.367281,
 6.7027693,
 0.62184376,
 19.42265,
 20.095926,
 12

In [None]:
import numpy as np
from numpy.linalg import norm

df = df.reset_index(drop=True)
cosine_sim = []

test_audio_embedding = (embedding[0][0].numpy())

for i in range(len(df)):
  i_th_embedding = np.array(list(df.iloc[i,:-1]))
  cosine_sim.append(np.dot(test_audio_embedding, i_th_embedding)/(norm(test_audio_embedding)*norm(i_th_embedding)))

max_cosine_sim_value_index = cosine_sim.index(max(cosine_sim))
print('Label for test audio:', list(df.iloc[:,-1])[max_cosine_sim_value_index])

Label for test audio: Manasi


### Checking Accuracy

In [None]:
# loading the dataset
df = pd.read_csv('speaker_embeddings.csv')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
0,1.188781,17.826025,29.743837,1.132418,-8.008463,9.594976,-29.848293,11.930159,-6.893938,34.704030,...,34.691240,38.219414,18.278406,1.545702,-23.225214,5.954052,12.148947,22.536795,18.404089,Others
1,21.707020,3.419267,31.416618,-84.044520,7.444968,-6.320386,3.244490,-26.952406,-13.728929,9.503801,...,14.642851,2.089398,3.390877,15.946771,15.910294,-11.697279,-7.393023,-33.532740,-20.465734,Others
2,4.979878,35.681538,6.289971,30.999308,-14.643521,3.067934,-8.209516,64.636340,16.456408,17.849964,...,-0.922462,2.950448,83.365480,28.307693,15.851522,47.825410,33.609450,32.861626,3.396942,Others
3,53.279790,-8.097881,33.759167,-2.624982,-12.148025,-17.556618,-11.175826,4.556599,15.412368,50.170025,...,1.469266,-3.921163,39.929855,-4.398089,7.558495,-1.753784,33.742870,-20.446404,14.832313,Others
4,-21.542282,-34.274128,-54.973614,-40.267110,3.219451,44.412140,27.380272,-19.670525,-2.460453,64.026080,...,27.556746,8.209702,0.627985,36.286274,-41.660220,59.717403,12.068033,25.669214,67.296900,Others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,58.308730,-10.397073,14.865171,6.461299,-6.064877,1.927005,1.670384,7.083302,-11.591825,-10.993269,...,60.271170,11.780120,16.643130,4.844342,22.817638,-1.164587,27.402000,13.587922,-7.796694,Manasi
1642,28.772806,-1.919108,7.008942,27.679573,12.872961,-1.232876,-4.607184,32.450314,17.338670,-19.437466,...,60.926132,17.709055,11.775893,18.838877,0.896400,3.703606,22.772041,-5.973109,12.576173,Manasi
1643,35.188187,7.279063,25.294664,9.007158,7.373981,12.630660,-10.253393,1.222779,2.137157,-14.739006,...,31.149916,37.544147,-2.633849,10.398765,23.277319,17.044540,26.742550,-0.692820,6.294021,Manasi
1644,35.467445,5.723477,10.811752,17.891724,-1.837581,3.543923,13.526721,-0.422566,8.912366,-13.128389,...,54.354713,24.405313,17.112703,16.448229,9.242860,-2.808809,26.141262,14.492411,-4.258360,Manasi


In [None]:
# shuffling the dataframe

from sklearn.utils import shuffle

df = shuffle(df)
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
0,49.776833,8.066255,27.134134,-20.257420,-25.927511,-59.754574,-11.704151,8.347429,17.053637,32.110332,...,20.963074,0.636733,71.741740,-5.964583,-24.724566,37.806280,32.134440,-17.710154,12.811234,Others
1,28.704441,-14.387841,23.363825,-12.755470,12.197923,-11.438946,2.427452,9.550383,-24.688637,-0.701411,...,15.275725,19.515460,15.120666,-14.722564,0.717720,11.667382,36.094864,-13.258420,24.348045,Manasi
2,35.000507,26.360340,7.410617,4.849377,3.645768,21.726187,-4.216229,22.669603,5.553197,-3.904913,...,56.099552,19.389635,-3.916201,5.106121,20.312822,10.940690,10.244753,-18.851843,-0.664223,Manasi
3,12.198272,-31.064690,-12.258200,8.498692,-6.904105,-17.671375,-22.579336,25.312922,11.660384,23.479870,...,22.098171,-19.515926,-3.720428,21.201677,16.817398,-37.897480,-18.827375,76.820755,25.147274,Others
4,35.000507,26.360340,7.410617,4.849377,3.645768,21.726187,-4.216229,22.669603,5.553197,-3.904913,...,56.099552,19.389635,-3.916201,5.106121,20.312822,10.940690,10.244753,-18.851843,-0.664223,Manasi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,37.422500,12.289909,5.709994,0.691654,-31.527693,63.537750,34.974120,13.246988,24.850773,49.771797,...,2.984805,-10.257984,-30.666601,3.978021,18.202358,35.589256,38.514885,-28.750490,-29.507132,Others
1642,54.620712,20.794580,2.406999,2.901893,16.946160,17.160610,4.978879,23.394592,16.037764,-6.373573,...,50.010560,33.509346,-2.983650,-11.721277,6.198156,9.764318,27.156021,-23.027365,14.144227,Manasi
1643,35.304718,11.014476,30.536829,-36.806590,-15.406444,-15.639824,44.997086,28.803902,34.060642,50.666264,...,27.133495,0.155817,1.389185,-8.823438,-8.490711,28.302357,0.656155,-19.337877,37.072266,Others
1644,-2.724951,36.380928,16.943040,-10.813402,-4.692611,-50.225006,-42.523243,-2.760701,0.868319,30.656422,...,40.753624,1.655927,10.295820,-26.669150,16.577324,-9.967655,43.897110,-13.649199,35.502100,Others


In [None]:
# splitting the dataset for training and testing
df_train = df[:1000]
display(df_train)
df_test = df[1000:]
display(df_test)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
0,49.776833,8.066255,27.134134,-20.257420,-25.927511,-59.754574,-11.704151,8.347429,17.053637,32.110332,...,20.963074,0.636733,71.741740,-5.964583,-24.724566,37.806280,32.134440,-17.710154,12.811234,Others
1,28.704441,-14.387841,23.363825,-12.755470,12.197923,-11.438946,2.427452,9.550383,-24.688637,-0.701411,...,15.275725,19.515460,15.120666,-14.722564,0.717720,11.667382,36.094864,-13.258420,24.348045,Manasi
2,35.000507,26.360340,7.410617,4.849377,3.645768,21.726187,-4.216229,22.669603,5.553197,-3.904913,...,56.099552,19.389635,-3.916201,5.106121,20.312822,10.940690,10.244753,-18.851843,-0.664223,Manasi
3,12.198272,-31.064690,-12.258200,8.498692,-6.904105,-17.671375,-22.579336,25.312922,11.660384,23.479870,...,22.098171,-19.515926,-3.720428,21.201677,16.817398,-37.897480,-18.827375,76.820755,25.147274,Others
4,35.000507,26.360340,7.410617,4.849377,3.645768,21.726187,-4.216229,22.669603,5.553197,-3.904913,...,56.099552,19.389635,-3.916201,5.106121,20.312822,10.940690,10.244753,-18.851843,-0.664223,Manasi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,42.657080,2.046956,3.481007,19.045560,5.982240,15.047331,12.260719,2.871956,23.258180,-16.600754,...,25.430616,29.342392,-4.415938,5.721099,-0.911945,1.538572,30.695581,5.144508,8.035970,Manasi
996,8.699756,-78.393880,48.919140,-5.210661,-9.133603,31.208551,0.509823,-17.482128,-6.778493,43.881367,...,41.988262,6.353457,9.268475,38.832390,-22.252200,0.029987,7.972170,-18.242474,0.163222,Others
997,28.772806,-1.919108,7.008942,27.679573,12.872961,-1.232876,-4.607184,32.450314,17.338670,-19.437466,...,60.926132,17.709055,11.775893,18.838877,0.896400,3.703606,22.772041,-5.973109,12.576173,Manasi
998,35.176178,-3.459134,9.382600,8.512279,-6.783773,3.092777,6.052514,12.555882,13.877194,-6.815785,...,45.327590,16.381770,11.485722,5.737587,13.860158,2.974653,26.851334,-5.488117,1.376792,Manasi


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,183,184,185,186,187,188,189,190,191,label
1000,-9.614812,-10.426547,4.799513,-7.512684,26.617022,2.481492,-32.932390,68.749500,49.527440,8.437414,...,-38.161390,-11.709952,-3.747497,-36.941530,-4.706162,26.607948,40.162070,16.413418,-37.168440,Others
1001,58.459637,-0.787830,10.382180,23.754723,11.287686,6.819263,12.818145,-1.008009,14.381887,-14.313101,...,35.448050,29.912980,-8.389632,2.194664,2.329855,9.617247,17.197231,-7.274568,29.673018,Manasi
1002,17.121735,-1.326003,-1.148823,-1.770567,-0.574132,12.678126,-19.141129,20.180878,-9.295573,-10.499725,...,16.796290,29.588930,2.093606,-21.001550,-9.298296,2.916578,26.127024,-6.389246,28.168243,Manasi
1003,-51.328182,-46.438717,-25.135298,0.624265,29.592428,-8.170038,-28.436193,-27.276375,-30.911144,-0.375627,...,51.094936,30.262915,17.077837,-9.044117,-1.353856,-28.637020,-30.774810,-47.072400,38.563800,Others
1004,35.281574,4.194872,7.580202,15.764833,13.898938,3.842769,4.181836,7.616991,-7.492573,-14.730691,...,56.269295,23.429337,-2.499738,1.594876,17.447283,2.742844,20.794142,8.812087,14.811654,Manasi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,37.422500,12.289909,5.709994,0.691654,-31.527693,63.537750,34.974120,13.246988,24.850773,49.771797,...,2.984805,-10.257984,-30.666601,3.978021,18.202358,35.589256,38.514885,-28.750490,-29.507132,Others
1642,54.620712,20.794580,2.406999,2.901893,16.946160,17.160610,4.978879,23.394592,16.037764,-6.373573,...,50.010560,33.509346,-2.983650,-11.721277,6.198156,9.764318,27.156021,-23.027365,14.144227,Manasi
1643,35.304718,11.014476,30.536829,-36.806590,-15.406444,-15.639824,44.997086,28.803902,34.060642,50.666264,...,27.133495,0.155817,1.389185,-8.823438,-8.490711,28.302357,0.656155,-19.337877,37.072266,Others
1644,-2.724951,36.380928,16.943040,-10.813402,-4.692611,-50.225006,-42.523243,-2.760701,0.868319,30.656422,...,40.753624,1.655927,10.295820,-26.669150,16.577324,-9.967655,43.897110,-13.649199,35.502100,Others


In [51]:
# testing on the testing set
df_test_without_labels = df_test.iloc[:,:-1]

label_predictions = []

for j in range(len(df_test_without_labels)):
  test_audio_embedding = list(df_test_without_labels.iloc[j,:])
  cosine_sim = []

  for i in range(len(df_train)):
    i_th_embedding = np.array(list(df_train.iloc[i,:-1]))
    cosine_sim.append(np.dot(test_audio_embedding, i_th_embedding)/(norm(test_audio_embedding)*norm(i_th_embedding)))

  max_cosine_sim_value_index = cosine_sim.index(max(cosine_sim))
  label_predictions.append(list(df_train.iloc[:,-1])[max_cosine_sim_value_index])

df_test['Predicted Label'] = label_predictions
df_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Predicted Label'] = label_predictions


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,184,185,186,187,188,189,190,191,label,Predicted Label
1000,-9.614812,-10.426547,4.799513,-7.512684,26.617022,2.481492,-32.932390,68.749500,49.527440,8.437414,...,-11.709952,-3.747497,-36.941530,-4.706162,26.607948,40.162070,16.413418,-37.168440,Others,Others
1001,58.459637,-0.787830,10.382180,23.754723,11.287686,6.819263,12.818145,-1.008009,14.381887,-14.313101,...,29.912980,-8.389632,2.194664,2.329855,9.617247,17.197231,-7.274568,29.673018,Manasi,Manasi
1002,17.121735,-1.326003,-1.148823,-1.770567,-0.574132,12.678126,-19.141129,20.180878,-9.295573,-10.499725,...,29.588930,2.093606,-21.001550,-9.298296,2.916578,26.127024,-6.389246,28.168243,Manasi,Manasi
1003,-51.328182,-46.438717,-25.135298,0.624265,29.592428,-8.170038,-28.436193,-27.276375,-30.911144,-0.375627,...,30.262915,17.077837,-9.044117,-1.353856,-28.637020,-30.774810,-47.072400,38.563800,Others,Others
1004,35.281574,4.194872,7.580202,15.764833,13.898938,3.842769,4.181836,7.616991,-7.492573,-14.730691,...,23.429337,-2.499738,1.594876,17.447283,2.742844,20.794142,8.812087,14.811654,Manasi,Manasi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,37.422500,12.289909,5.709994,0.691654,-31.527693,63.537750,34.974120,13.246988,24.850773,49.771797,...,-10.257984,-30.666601,3.978021,18.202358,35.589256,38.514885,-28.750490,-29.507132,Others,Others
1642,54.620712,20.794580,2.406999,2.901893,16.946160,17.160610,4.978879,23.394592,16.037764,-6.373573,...,33.509346,-2.983650,-11.721277,6.198156,9.764318,27.156021,-23.027365,14.144227,Manasi,Manasi
1643,35.304718,11.014476,30.536829,-36.806590,-15.406444,-15.639824,44.997086,28.803902,34.060642,50.666264,...,0.155817,1.389185,-8.823438,-8.490711,28.302357,0.656155,-19.337877,37.072266,Others,Others
1644,-2.724951,36.380928,16.943040,-10.813402,-4.692611,-50.225006,-42.523243,-2.760701,0.868319,30.656422,...,1.655927,10.295820,-26.669150,16.577324,-9.967655,43.897110,-13.649199,35.502100,Others,Others


In [52]:
# checking accuracy
correct_predictions = (df_test['label'] == df_test['Predicted Label']).sum()
total_predictions = len(df_test)
accuracy = correct_predictions / total_predictions

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 1.00
