In [1]:
import os
import librosa
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
from sklearn.mixture import GaussianMixture
#import python_speech_features as mfcc
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from tqdm import tqdm
import joblib
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score

In [3]:
noise_folder = './16000_pcm_speeches/_background_noise_/doing_the_dishes.wav'

In [5]:
import librosa
import numpy as np

def add_noise_to_audio(audio_file, noise_file = noise_folder, scaling_factor=0.5, additive_factor=0.1):
    # Load the audio file
    audio_data, sr_audio = librosa.load(audio_file, mono=True)

    # Load the noise file
    noise_data, sr_noise = librosa.load(noise_file, mono=True)

    # Repeat or trim the audio data to match the length of the noise data
    if len(audio_data) < len(noise_data):
        repeat_factor = int(np.ceil(len(noise_data) / len(audio_data)))
        audio_data = np.tile(audio_data, repeat_factor)[:len(noise_data)]
    else:
        audio_data = audio_data[:len(noise_data)]

    # Combine the audio and noise signals with scaling
    scaled_audio = scaling_factor * audio_data + (1 - scaling_factor) * noise_data

    # Additive noise to the scaled audio
    audio_with_noise = scaled_audio + additive_factor * noise_data

    # Normalize the result
    audio_with_noise /= np.max(np.abs(audio_with_noise))

    return audio_with_noise, sr_audio

In [8]:
def return_features(audio):
    # y, sr = librosa.load(audio, sr=None)
    y, sr = add_noise_to_audio(audio)
    # y = denoise_and_remove_noise(audio,sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_delta = librosa.feature.delta(mfccs)
    mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
    #zero_c = librosa.zero_crossings(mfccs, pad=False)
    #spectral_c = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    #spectral_con = librosa.feature.spectral_contrast(y=y,sr=sr)
    features = np.vstack([mfccs, mfcc_delta, mfcc_delta2])
    #features_pca = pca.fit_transform(features)
    return (features.T)

In [9]:
class_label ={'Benjamin_Netanyau': 0, 'Jens_Stoltenberg': 1, 'Julia_Gillard': 2, 'Magaret_Tarcher': 3, 'Nelson_Mandela': 4}

In [10]:
path = '16000_pcm_speeches'

In [11]:
os.listdir(path)

['Benjamin_Netanyau',
 'Jens_Stoltenberg',
 'Julia_Gillard',
 'Magaret_Tarcher',
 'Nelson_Mandela',
 'other',
 '_background_noise_']

## Rename Files

In [12]:
for i in tqdm(class_label):
    folder_path = os.path.join(path,i)
    for file_name in os.listdir(folder_path):
        audio_file = os.path.join(folder_path,file_name)
        audio_name = i + '_' + file_name
        new_audio_name = os.path.join(folder_path,audio_name)
        #print(audio_file)
        os.rename(audio_file,new_audio_name)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:12<00:00,  2.52s/it]


In [13]:
file_list = []
class_list = []
for i in class_label:
    folder_path = os.path.join(path,i)
    for file_name in os.listdir(folder_path):
        file_list.append(file_name)
        class_list.append(i)
    

In [14]:
file_path='dataset.csv'
test_pred = pd.DataFrame({'Filename': file_list, 'TARGET': class_list})
test_pred.to_csv(file_path, mode='w', header=True, index=False)

In [15]:
len(file_list)

7501

In [16]:
data_set = pd.read_csv('dataset.csv')

In [17]:
data_set

Unnamed: 0,Filename,TARGET
0,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
1,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
2,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
3,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
4,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
...,...,...
7496,Nelson_Mandela_Nelson_Mandela_Nelson_Mandela_N...,Nelson_Mandela
7497,Nelson_Mandela_Nelson_Mandela_Nelson_Mandela_N...,Nelson_Mandela
7498,Nelson_Mandela_Nelson_Mandela_Nelson_Mandela_N...,Nelson_Mandela
7499,Nelson_Mandela_Nelson_Mandela_Nelson_Mandela_N...,Nelson_Mandela


In [18]:
data_set_train ,data_set_test = train_test_split(data_set,test_size=0.3,random_state=42)

In [19]:
data_set_train

Unnamed: 0,Filename,TARGET
4692,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
5263,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
3277,Julia_Gillard_Julia_Gillard_Julia_Gillard_Juli...,Julia_Gillard
6400,Nelson_Mandela_Nelson_Mandela_Nelson_Mandela_N...,Nelson_Mandela
2802,Jens_Stoltenberg_Jens_Stoltenberg_Jens_Stolten...,Jens_Stoltenberg
...,...,...
5191,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
5226,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
5390,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
860,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau


In [20]:
audio_features = []

for i in tqdm(class_label):
    mfcc_features = []
    folder_path = os.path.join(path,i)
    for j in data_set_train.index:
        if(data_set_train['TARGET'][j]==i):
            audio_file_name = data_set_train['Filename'][j]
            audio_file = os.path.join(folder_path,audio_file_name)
            #print(audio_file)
            features_mfcc = return_features(audio_file)
            for feature in features_mfcc:
                mfcc_features.append(feature)
    audio_features.append(mfcc_features)

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [24:58<00:00, 299.76s/it]


In [21]:
for i in range(5):
    print(len(audio_features[i]))

4190200
4313200
4407500
4391100
4223000


In [24]:
def train_gmm(data):
  gm = GaussianMixture(n_components=2, max_iter=80)
  gm.fit(data)
  return gm

In [25]:
gmm_model=[]
i=0
for temp in tqdm(audio_features):
  gmm_model.append(train_gmm(temp))
  #input('wait')
  #print(i)
  i=i+1

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [21:05<00:00, 253.13s/it]


In [26]:
for i in tqdm(range(5)):
    filename = 'gmm'+str(i)+'.joblib'
    joblib.dump(gmm_model[i],filename)
    print(i)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 66.58it/s]

0
1
2
3
4





In [27]:
saved_gmm=[]

for i in range(5):
    filename = 'gmm'+str(i)+'.joblib'
    load_gmm = joblib.load(filename)
    saved_gmm.append(load_gmm)

In [28]:
saved_gmm


[GaussianMixture(max_iter=80, n_components=2),
 GaussianMixture(max_iter=80, n_components=2),
 GaussianMixture(max_iter=80, n_components=2),
 GaussianMixture(max_iter=80, n_components=2),
 GaussianMixture(max_iter=80, n_components=2)]

In [29]:
def get_label(test_data,gmm_model):
  likelihood=[]
  test_mfcc = return_features(test_data)
  for class_model in gmm_model:
    likelihood.append(class_model.score(test_mfcc))
    #print(likelihood)
    # print(likelihood.index(max(likelihood)))
  return likelihood.index(max(likelihood))

In [30]:
label_final= []
for i in tqdm(data_set_test.index):
    audio_file = data_set_test['Filename'][i]
    folder_name = data_set_test['TARGET'][i]
    file_path = os.path.join(path,folder_name,audio_file)
    label_final.append(get_label(file_path,gmm_model))
    

100%|██████████████████████████████████████████████████████████████████████████████| 2251/2251 [13:26<00:00,  2.79it/s]


In [31]:
label_final

[1,
 1,
 2,
 3,
 3,
 4,
 1,
 2,
 1,
 2,
 0,
 3,
 2,
 4,
 2,
 4,
 0,
 0,
 0,
 2,
 4,
 0,
 4,
 1,
 2,
 1,
 4,
 1,
 4,
 1,
 4,
 1,
 0,
 4,
 1,
 4,
 4,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 4,
 3,
 2,
 4,
 1,
 2,
 2,
 3,
 1,
 2,
 4,
 2,
 2,
 3,
 1,
 3,
 4,
 0,
 2,
 4,
 1,
 4,
 1,
 2,
 2,
 1,
 0,
 1,
 1,
 2,
 1,
 1,
 4,
 3,
 3,
 1,
 2,
 4,
 1,
 2,
 1,
 1,
 0,
 1,
 4,
 3,
 2,
 3,
 1,
 4,
 3,
 0,
 1,
 3,
 1,
 1,
 1,
 4,
 4,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 4,
 0,
 1,
 2,
 0,
 3,
 2,
 2,
 4,
 4,
 4,
 2,
 4,
 1,
 4,
 3,
 4,
 4,
 1,
 3,
 1,
 1,
 1,
 3,
 0,
 1,
 4,
 2,
 3,
 1,
 4,
 1,
 0,
 4,
 3,
 2,
 2,
 4,
 0,
 1,
 2,
 1,
 1,
 4,
 1,
 4,
 1,
 0,
 2,
 1,
 0,
 2,
 2,
 4,
 2,
 2,
 3,
 1,
 1,
 1,
 4,
 1,
 4,
 3,
 1,
 3,
 2,
 0,
 2,
 1,
 0,
 2,
 3,
 2,
 4,
 2,
 1,
 1,
 2,
 1,
 2,
 4,
 1,
 3,
 1,
 4,
 2,
 1,
 3,
 4,
 1,
 1,
 2,
 2,
 1,
 4,
 4,
 1,
 1,
 2,
 4,
 3,
 1,
 1,
 2,
 2,
 2,
 1,
 0,
 0,
 3,
 2,
 2,
 0,
 3,
 2,
 0,
 4,
 2,
 0,
 4,
 1,
 2,
 4,
 0,
 4,
 1,
 0,
 3,
 1,
 3,
 4,
 4,
 1,
 0,


In [32]:
mapped_list = []
for i in label_final:
    for key,value in class_label.items():
        if(i==value):
            mapped_list.append(key)
        #if(i == j.value):
        #    mapped_list.append(j.key)

In [33]:
data_set_test

Unnamed: 0,Filename,TARGET
2310,Jens_Stoltenberg_Jens_Stoltenberg_Jens_Stolten...,Jens_Stoltenberg
3347,Julia_Gillard_Julia_Gillard_Julia_Gillard_Juli...,Julia_Gillard
3322,Julia_Gillard_Julia_Gillard_Julia_Gillard_Juli...,Julia_Gillard
4550,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
5049,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
...,...,...
1773,Jens_Stoltenberg_Jens_Stoltenberg_Jens_Stolten...,Jens_Stoltenberg
5476,Magaret_Tarcher_Magaret_Tarcher_Magaret_Tarche...,Magaret_Tarcher
1209,Benjamin_Netanyau_Benjamin_Netanyau_Benjamin_N...,Benjamin_Netanyau
3937,Julia_Gillard_Julia_Gillard_Julia_Gillard_Juli...,Julia_Gillard


In [34]:
count = Counter(mapped_list)

In [35]:
count

Counter({'Jens_Stoltenberg': 664,
         'Nelson_Mandela': 458,
         'Julia_Gillard': 424,
         'Benjamin_Netanyau': 364,
         'Magaret_Tarcher': 341})

In [36]:
accuracy = accuracy_score(data_set_test['TARGET'],mapped_list)

In [37]:
print(accuracy)

0.8587294535761884


In [None]:
#test_folder = '16000_pcm_speeches/test'

In [None]:
#os.listdir(test_folder)

In [None]:
# label_final=[]
# for file_test in tqdm(os.listdir(test_folder)):
#   eval_file_path = test_folder + '/' + file_test
#   #print(eval_file_path)
#   label_final.append(get_label(eval_file_path,saved_gmm))
#   #print(j)

In [None]:
#counter = Counter(label_final)

In [None]:
#counter

In [39]:
scores = []
for i in tqdm(data_set_test.index):
    audio_file = data_set_test['Filename'][i]
    folder_name = data_set_test['TARGET'][i]
    file_path = os.path.join(path, folder_name, audio_file)
    scores.append(get_label(file_path, gmm_model))

100%|██████████████████████████████████████████████████████████████████████████████| 2251/2251 [12:02<00:00,  3.12it/s]


In [43]:
from sklearn.metrics import roc_curve
from scipy.optimize import brentq
from scipy.interpolate import interp1d
def calculate_eer(true_labels, scores):
    fpr, tpr, thresholds = roc_curve(true_labels, scores, pos_label=1)
    eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
    return eer


true_labels = [class_label[class_name] for class_name in data_set_test['TARGET']]
eer = calculate_eer(true_labels, scores)
print(f'EER: {eer:.4f}')

EER: 0.7036
