# Imports

In [5]:
import torchaudio
from speechbrain.pretrained import EncoderClassifier
import os
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Loading the model

In [6]:
language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp", run_opts={"device":"cuda"})

We will also define the "classify" function fro convenience

In [7]:
def classify(path, language_id):
    signal, sr = torchaudio.load(path, channels_first=False)
    signal = language_id.audio_normalizer(signal, sr)
    prediction =  language_id.classify_batch(signal)
    pred_lang_name = prediction[3][0][:2]
    return pred_lang_name

# Testing and getting the data

Test on real data from /mnt/ssd/nanosemantics/cv-corpus-9.0-2022-04-27/

In [9]:

corp_dir = '/mnt/ssd/nanosemantics/cv-corpus-9.0-2022-04-27/'

y_test = []
y_pred = []

for lang_dir in os.listdir(corp_dir):
    if lang_dir == 'ky':
        continue
    
    lang_name = lang_dir[:2]
    full_path = corp_dir+'/'+lang_dir+'/clips'
    audio_files = os.listdir(full_path)
    
    if len(audio_files) < 1000:
        n = len(audio_files)
    else:
        n = 1000
    for i in range(n):
        y_test.append(lang_name)
                             
    for audio_file in audio_files[:n]:
        full_audio_file = full_path+'/'+audio_file   
        
        pred_lang_name = classify(full_audio_file, language_id)
        
        y_pred.append(pred_lang_name)
    
    print(lang_name, 'finished')

it finished
kk finished
hi finished
tr finished
be finished
de finished
ja finished
uk finished
pt finished
ru finished
id finished
uz finished
ar finished
bn finished
en finished
fr finished
zh finished
es finished


# Perform metric calculations

First we convert y_test and y_pred to numpy arrays

In [10]:
y_test = np.array(y_test)
y_pred = np.array(y_pred)

Accuracy score

In [11]:
acc_score = accuracy_score(y_test, y_pred)
print('Accuracy score:', acc_score)

Accuracy score: 0.7979444444444445


Confusion matrix

In [15]:
conf_mtx = confusion_matrix(y_test, y_pred, labels = ['it', 'kk', 'hi', 'tr', 'be', 'de', 'ja', 'uk', 'pt', 'ru', 'id', 'uz', 'ar', 'bn', 'en', 'fr', 'zh', 'es'])
print('Confusion matrix:')
print(conf_mtx)

Confusion matrix:
[[823   0   0   1   0   0   1   3   3   1   1   2   2   2   0   1   0   2]
 [  0 960   0   0   0   0   0   0   0  11   2   8   1   0   0   1   0   0]
 [  0   0 844   0   0   0   0   0   0   0   1   0   0   0   2   0   0   0]
 [  0  14   2 740   2   0   9   2   2   2   1  11   3   2   1   2   1   0]
 [  0   3   0   0 785   0   0  68   0 127   0   2   0   0   0   0   0   0]
 [  0   2   1   0   0 842   1   2   0   0   1   4   2   0   4   1   1   0]
 [  0   0   0   2   0   0 935   1   2   1   0   0   0   1   0   4   2   0]
 [  0   3   2   0  39   0   0 807   2  72   0   2   0   0   0   0   0   0]
 [  0   3   0   2   1   0   1   4 851   1   0   1   0   2   1   3   0   1]
 [  0   2   0   0 138   0   0  40   3 756   1   1   1   0   0   0   0   0]
 [  0   2   1   0   1   0   1   2   1   0 726   2   4   1   0   0   1   0]
 [  0  17   5   4   0   0   3   1   1   3   1 791   2   0   1   2   0   0]
 [  2   1   2   1   1   1   1   1   2   3   4   8 728   6   0   4   2   0]
 [  0  

Classification report

In [18]:
clsf_report = classification_report(y_test, y_pred, labels = ['it', 'kk', 'hi', 'tr', 'be', 'de', 'ja', 'uk', 'pt', 'ru', 'id', 'uz', 'ar', 'bn', 'en', 'fr', 'zh', 'es'])
print(clsf_report)

              precision    recall  f1-score   support

          it       0.99      0.82      0.90      1000
          kk       0.95      0.96      0.95      1000
          hi       0.96      0.84      0.90      1000
          tr       0.99      0.74      0.85      1000
          be       0.81      0.79      0.80      1000
          de       0.98      0.84      0.91      1000
          ja       0.97      0.94      0.95      1000
          uk       0.86      0.81      0.83      1000
          pt       0.96      0.85      0.90      1000
          ru       0.77      0.76      0.76      1000
          id       0.98      0.73      0.83      1000
          uz       0.94      0.79      0.86      1000
          ar       0.96      0.73      0.83      1000
          bn       0.98      0.91      0.94      1000
          en       0.98      0.64      0.77      1000
          fr       0.98      0.87      0.92      1000
          zh       0.99      0.85      0.92      1000
          es       0.99    

Saving all the resulting values

In [19]:
np.save('testing_results/accuracy_score.npy', acc_score)
np.save('testing_results/confusion_matrix.npy', conf_mtx)
np.save('testing_results/classification_report.npy', clsf_report)