In [86]:
# basic packages

import numpy as np
import librosa
import os
import pandas as pd
import pickle

In [55]:
# machine learning & preprocessing packages

from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [56]:
# function to create dataframe with the needed features for the header

def create_dataframe():
    # dataframe header
    header = 'filename rmse zero_crossing_rate'
    # insert the 21 mfcc feature column headers
    for i in range(1, 21):
        header += f' mfcc{i}'
    header += ' label'                                  # the label header
    header = header.split()                             # make the header string -> array
    features_df = pd.DataFrame(columns = header)        # create the dataframe
    return features_df

In [89]:
# function to extract features from folder of sounds and turn it into dataframe

def extractWavFeatures(soundFilesFolder):
    # create the dataframe with the needed headers
    features_df = create_dataframe()

    for filename in os.listdir(soundFilesFolder):
        number = f'{soundFilesFolder}/{filename}'
        audio, sr = librosa.load(number, mono=True, duration=3)

        # remove leading and trailing silence
        audio, index = librosa.effects.trim(audio)
        rmse = librosa.feature.rms(y=audio)
        zcr = librosa.feature.zero_crossing_rate(audio)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr)
        # dataframe row
        row_data = f'{filename} {np.mean(rmse)} {np.mean(zcr)}'

        # add mfcc features
        for e in mfcc:
            row_data += f' {np.mean(e)}'


        # add the labels for the dataframe
        if 'ibrahim' in filename:
            row_data += f' {1}'
        elif 'Amr' in filename:
            row_data += f' {2}'
        elif 'mariam' in filename:
            row_data += f' {3}'
        elif 'momen' in filename:
            row_data += f' {4}'
        else:
            row_data += f' {0}'

        # append the row to the dataframe
        features_df.loc[len(features_df)] = row_data.split()
    return features_df

In [84]:
team_df = extractWavFeatures("./mydata")

In [59]:
team_df

Unnamed: 0,filename,rmse,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,Amr1.wav,0.03124353289604187,0.09524574762658228,-398.46075439453125,138.5895538330078,3.0195839405059814,-12.14544677734375,-12.920365333557129,13.24248218536377,-16.564815521240234,...,0.2325524240732193,0.9407077431678772,0.8140118718147278,-4.634973049163818,5.3455023765563965,3.7176096439361572,-7.20994758605957,-3.473836660385132,0.6744386553764343,2
1,Amr10.wav,0.021408159285783768,0.08141243811881188,-434.5036315917969,152.6798095703125,12.030377388000488,0.9768251180648804,0.0641842857003212,11.69894790649414,-16.82280158996582,...,2.804825782775879,3.359466314315796,-1.487720251083374,2.9492483139038086,8.075194358825684,-1.483323335647583,0.37750381231307983,-0.8345574140548706,1.6290479898452759,2
2,Amr11.wav,0.052045322954654694,0.09086834016393443,-389.0693664550781,140.72108459472656,22.436880111694336,7.38924503326416,-14.420411109924316,10.467955589294434,-14.195365905761719,...,3.77705454826355,4.0779266357421875,-2.4903037548065186,1.615167260169983,2.439157247543335,3.506373405456543,-4.329512119293213,-0.24602235853672028,-3.07216215133667,2
3,Amr12.wav,0.03662358224391937,0.08905666977611941,-430.4794921875,148.5320281982422,19.754179000854492,2.6437618732452393,-15.569217681884766,9.518011093139648,-15.909305572509766,...,4.775101661682129,7.445225238800049,-6.382566452026367,0.664188802242279,6.244306564331055,0.35481828451156616,-3.0987207889556885,-0.8284878134727478,-3.9141740798950195,2
4,Amr13.wav,0.035578180104494095,0.08059939822635136,-429.6603088378906,149.5933380126953,16.58245849609375,-3.1334006786346436,-18.459609985351562,8.883918762207031,-14.992230415344238,...,7.0627827644348145,6.48460578918457,-6.427166938781738,0.4854406416416168,4.968094348907471,0.7781060934066772,-3.97288179397583,-0.9344853758811951,-2.878880262374878,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,momen_open5.wav,0.10217876732349396,0.08389014528508772,-257.34478759765625,140.4272918701172,13.611971855163574,14.937666893005371,-10.300552368164062,-0.3203711211681366,-20.966815948486328,...,0.14832720160484314,-3.722841501235962,-2.639975070953369,-0.2225346863269806,9.201364517211914,-11.765226364135742,4.404398441314697,2.242324113845825,-4.975485324859619,4
66,momen_open6.wav,0.060201484709978104,0.07716652199074074,-324.0458679199219,139.704833984375,24.981340408325195,18.18901824951172,-1.65377676486969,4.463167667388916,-12.434250831604004,...,2.4508330821990967,-2.8877012729644775,-3.1460652351379395,2.6340787410736084,3.854548215866089,-11.689922332763672,2.216581344604492,-2.1184122562408447,-5.689245223999023,4
67,momen_open7.wav,0.06575772911310196,0.07684405438311688,-328.0724182128906,143.20729064941406,21.987651824951172,17.92111587524414,-4.03651762008667,3.07590913772583,-12.331384658813477,...,0.8533156514167786,-8.088213920593262,-0.7085484862327576,3.1991419792175293,4.775851726531982,-8.51183032989502,4.142024993896484,-2.7163186073303223,-5.273591995239258,4
68,momen_open8.wav,0.07254287600517273,0.08229827880859375,-304.3789367675781,134.51904296875,25.865615844726562,13.691479682922363,-5.350910663604736,1.9408302307128906,-14.987211227416992,...,6.461264610290527,-6.673274040222168,-1.7020838260650635,2.877892017364502,6.008303642272949,-9.927830696105957,4.957016944885254,-3.318913221359253,-5.199673652648926,4


In [60]:
others_df = extractWavFeatures('./otherdata')

In [61]:
others_df

Unnamed: 0,filename,rmse,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,Abdalrahman_ali3.wav,0.1066137999296188,0.0693519467213114,-287.6629638671875,144.89564514160156,10.871818542480469,20.887985229492188,0.3699750304222107,-9.125370979309082,-6.464406490325928,...,-6.15941333770752,-6.608680725097656,2.4127354621887207,3.0699620246887207,-2.6879241466522217,-5.356696128845215,5.847270965576172,-4.211055278778076,2.4562854766845703,0
1,Abdalrahman_yasser7.wav,0.1103756055235862,0.0765333299512987,-244.219482421875,143.8993682861328,5.574076175689697,21.860469818115234,8.178303718566895,1.810903549194336,-12.31941032409668,...,1.0218335390090942,-2.7904012203216557,-6.042269706726074,-4.322595596313477,9.931966781616213,-12.113450050354004,-2.890146255493164,-4.532797336578369,-0.1253572702407837,0
2,Elsarta3.wav,0.0882738381624221,0.0725025850183823,-307.01202392578125,131.86398315429688,25.091894149780277,21.84569358825684,17.00538444519043,0.9868841767311096,0.7059024572372437,...,5.29824686050415,-12.500237464904783,7.309794902801514,1.8344430923461916,1.4465394020080566,-3.0290310382843018,4.693262100219727,-4.838291645050049,3.2337045669555664,0
3,Gufran_Mohamed5.wav,0.0890224277973175,0.1144120065789473,-301.59747314453125,114.06333923339844,4.114558219909668,22.731372833251957,-8.272941589355469,3.5818233489990234,-2.35291051864624,...,-1.7376694679260254,1.817986011505127,-3.8565433025360103,-4.763492584228516,-6.63831090927124,-5.775609493255615,-2.124946117401123,-4.153073310852051,5.735220909118652,0
4,Mayar_fayez8.wav,0.0842246040701866,0.1045174434267241,-308.1674499511719,118.34268188476562,-3.082785129547119,20.54697799682617,12.2445068359375,1.1642287969589231,0.0885040983557701,...,-3.380470037460327,-8.52939510345459,2.1461806297302246,5.399193286895752,-6.447416305541992,-5.478508472442627,4.634821891784668,-3.652671813964844,-2.5270371437072754,0
5,Maye_Khaled9.wav,0.0610729120671749,0.0800119173728813,-345.6620788574219,140.35247802734375,9.668944358825684,17.194801330566406,3.689471244812012,3.891180038452149,-4.054403305053711,...,4.6914567947387695,-7.468480110168457,13.591641426086426,-1.8055588006973269,-2.231125593185425,1.569752216339111,-3.772193670272827,-4.217522144317627,-5.23639440536499,0
6,Misara_Ahmed10.wav,0.0479998178780078,0.1147024972098214,-327.28912353515625,112.05503845214844,18.58304977416992,20.620424270629883,3.714186906814575,5.093952178955078,-12.470181465148926,...,4.943907260894775,-2.882629871368408,1.6429415941238403,0.5191766619682312,8.959257125854492,-2.7291717529296875,3.2844035625457764,-4.0250563621521,1.0311195850372314,0
7,Mohamed_Mostafa7.wav,0.082649827003479,0.0882655552455357,-320.76513671875,133.84722900390625,18.66616439819336,14.334344863891602,7.861462116241455,6.650166988372803,1.1358981132507324,...,0.5900747179985046,-3.669459104537964,-1.5224753618240356,2.20630955696106,2.99753212928772,0.3159008920192718,-0.4543010592460632,-3.243759155273437,1.4734705686569214,0
8,Moheb4.wav,0.099325492978096,0.1135764898255814,-245.3318328857422,118.10572052001952,-3.9917147159576416,18.25374794006348,14.0527925491333,-0.432139903306961,-9.787726402282717,...,2.8047966957092285,-5.718765735626221,-3.0440893173217773,0.3437050580978393,9.724272727966309,-7.581715583801269,-0.2839079201221466,-3.650105237960816,5.059080123901367,0
9,Naira_Youssif5.wav,0.0908483117818832,0.0911747101814516,-316.66754150390625,130.75445556640625,2.6571710109710693,4.074635028839111,-4.418753147125244,-4.892263889312744,-2.150657892227173,...,3.340596914291382,-8.426239013671875,4.838777542114258,-4.010745525360107,-6.416068077087402,-4.732669830322266,0.5335485935211182,-3.917380571365357,-2.6996681690216064,0


In [62]:
train_df = pd.concat([team_df,others_df])
# train_df = team_df

In [63]:
train_df

Unnamed: 0,filename,rmse,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,Amr1.wav,0.03124353289604187,0.09524574762658228,-398.46075439453125,138.5895538330078,3.0195839405059814,-12.14544677734375,-12.920365333557129,13.24248218536377,-16.564815521240234,...,0.2325524240732193,0.9407077431678772,0.8140118718147278,-4.634973049163818,5.3455023765563965,3.7176096439361572,-7.20994758605957,-3.473836660385132,0.6744386553764343,2
1,Amr10.wav,0.021408159285783768,0.08141243811881188,-434.5036315917969,152.6798095703125,12.030377388000488,0.9768251180648804,0.0641842857003212,11.69894790649414,-16.82280158996582,...,2.804825782775879,3.359466314315796,-1.487720251083374,2.9492483139038086,8.075194358825684,-1.483323335647583,0.37750381231307983,-0.8345574140548706,1.6290479898452759,2
2,Amr11.wav,0.052045322954654694,0.09086834016393443,-389.0693664550781,140.72108459472656,22.436880111694336,7.38924503326416,-14.420411109924316,10.467955589294434,-14.195365905761719,...,3.77705454826355,4.0779266357421875,-2.4903037548065186,1.615167260169983,2.439157247543335,3.506373405456543,-4.329512119293213,-0.24602235853672028,-3.07216215133667,2
3,Amr12.wav,0.03662358224391937,0.08905666977611941,-430.4794921875,148.5320281982422,19.754179000854492,2.6437618732452393,-15.569217681884766,9.518011093139648,-15.909305572509766,...,4.775101661682129,7.445225238800049,-6.382566452026367,0.664188802242279,6.244306564331055,0.35481828451156616,-3.0987207889556885,-0.8284878134727478,-3.9141740798950195,2
4,Amr13.wav,0.035578180104494095,0.08059939822635136,-429.6603088378906,149.5933380126953,16.58245849609375,-3.1334006786346436,-18.459609985351562,8.883918762207031,-14.992230415344238,...,7.0627827644348145,6.48460578918457,-6.427166938781738,0.4854406416416168,4.968094348907471,0.7781060934066772,-3.97288179397583,-0.9344853758811951,-2.878880262374878,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,Sama_Mostafa14.wav,0.09802084416151047,0.08207334321120689,-309.14495849609375,145.9494171142578,0.4912550747394562,0.4897775650024414,-1.1724460124969482,-11.389527320861816,-12.105578422546387,...,4.070107936859131,-4.355195045471191,0.548127293586731,4.8445820808410645,-2.4607954025268555,-6.609767436981201,2.2119452953338623,-8.701488494873047,-6.439525127410889,0
16,sara_amgad10.wav,0.05758098512887955,0.09900173611111111,-325.34326171875,144.0577850341797,-2.0796611309051514,11.415250778198242,9.431700706481934,4.165267467498779,-6.603484630584717,...,3.7682690620422363,0.14088685810565948,-4.578253746032715,1.0967668294906616,-0.04059265926480293,-8.100237846374512,-0.19826380908489227,-6.887458801269531,-1.2006165981292725,0
17,Shirouq7.wav,0.07740089297294617,0.08069349315068493,-275.23614501953125,122.78518676757812,16.025510787963867,25.77071762084961,4.772244453430176,-3.9702308177948,-11.76247787475586,...,-0.8033259510993958,-0.9933399558067322,7.550411701202393,-4.166332721710205,1.9667266607284546,-5.11379861831665,-5.6719560623168945,-3.9446661472320557,-3.22495174407959,0
18,Sohaila_Mohamed7.wav,0.1270662099123001,0.1521782309322034,-243.84642028808594,105.77610778808594,15.568729400634766,11.314079284667969,-2.7036733627319336,3.599822998046875,-14.891018867492676,...,2.8019936084747314,-3.50246262550354,4.430895805358887,1.3781228065490723,-0.36693599820137024,-1.8721753358840942,4.330615520477295,-5.755814552307129,-0.4267715513706207,0


In [64]:
# drop the unnecessary(label and filename) columns
X_Global = train_df.drop(columns=['label','filename'], axis=1)
# get the label in new df
Y_Global = train_df['label']

In [65]:
scaler = StandardScaler()
scaler.fit(X_Global)
standardized_data = scaler.transform(X_Global)

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X_Global, Y_Global, test_size=0.3, random_state=42,shuffle = True)

In [67]:
def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    predicts = model.predict(X_test)
    print('Accuracy', title, ':', round(accuracy_score(y_test, predicts), 5), '\n')

In [68]:
# Naive Bayes
nb = GaussianNB()
model_assess(nb, "Naive Bayes")

# Stochastic Gradient Descent
sgd = SGDClassifier(max_iter=5000, random_state=0)
model_assess(sgd, "Stochastic Gradient Descent")

# KNN
knn = KNeighborsClassifier(n_neighbors=19)
model_assess(knn, "KNN")

# Decision trees
tree = DecisionTreeClassifier()
model_assess(tree, "Decision trees")

# Random Forest
rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")

# Support Vector Machine
svm = SVC(decision_function_shape="ovo")
model_assess(svm, "Support Vector Machine")

# Logistic Regression
lg = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
model_assess(lg, "Logistic Regression")

# Neural Nets
nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5000, 10), random_state=1)
model_assess(nn, "Neural Nets")

Accuracy Naive Bayes : 0.96296 

Accuracy Stochastic Gradient Descent : 0.66667 

Accuracy KNN : 0.81481 

Accuracy Decision trees : 0.88889 

Accuracy Random Forest : 1.0 

Accuracy Support Vector Machine : 0.37037 

Accuracy Logistic Regression : 0.96296 



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy Neural Nets : 0.74074 



In [69]:
# start using Random Forest Model

y = train_df['label'] # label
X = train_df.drop(columns = ['filename','label'], axis=1)   # data without label

cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)

# new data frame with the new scaled data.
X = pd.DataFrame(np_scaled, columns = cols)

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,shuffle = True)

In [71]:
# create instance of the model
model = RandomForestClassifier(n_estimators=1000, max_depth=50, random_state=0)
model.fit(X_train, y_train)     # model fitting

predictions = model.predict(X_train)
X_train_prediction = model.predict(X_train)
accuracy_score(X_train_prediction, y_train)

X_test_prediction = model.predict(X_test)
new_score = accuracy_score(X_test_prediction, y_test)
print("accuracy is:")
accuracy_score(X_test_prediction, y_test)

accuracy is:


1.0

In [72]:
# naive bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

predictions_nb = nb_model.predict(X_train)
X_train_prediction_nb = nb_model.predict(X_train)
accuracy_score(X_train_prediction_nb, y_train)

X_test_prediction_nb = nb_model.predict(X_test)
new_score = accuracy_score(X_test_prediction_nb, y_test)
print("accuracy is:")
accuracy_score(X_test_prediction_nb, y_test)

accuracy is:


0.9629629629629629

In [73]:
def predict(x):
    scaled = min_max_scaler.transform(x)
    return nb_model.predict(scaled)

In [74]:
# function for testing manually
def get_result(path):

    y, sr = librosa.load(path, mono=True, duration=3)

    # remove leading and trailing silence
    y, index = librosa.effects.trim(y)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    row_data = f'{np.mean(rmse)} {np.mean(zcr)}'
    for e in mfcc:
        row_data += f' {np.mean(e)}'

    print(predict([row_data.split()]))

In [75]:
get_result("C:/Users/I1bra/OneDrive/Documents/Sound Recordings/Recording (7).wav")
# Ibrahim

['1']




In [76]:
get_result("D:/My PC/Projects/DSP/Voice-Recognition-System/Model/data/Amr/Voice 014.wav")
# Amr

['2']




In [77]:
get_result("D:/My PC/Projects/DSP/Voice-Recognition-System/Model/data/Mariam_Wael_close/mariam_close14.wav")
# Mariam

['3']




In [78]:
get_result("D:/My PC/Projects/DSP/Voice-Recognition-System/Model/data/Naira_Youssif/Naira_Youssif3.wav")
# Naira

['0']




In [79]:
get_result("C:/Users/I1bra/Downloads/Music/WhatsApp Ptt 2022-12-07 at 15.05.52.wav")
# Stranger

['3']




In [80]:
get_result("C:/Users/I1bra/Downloads/Music/WhatsApp Ptt 2022-12-06 at 13.25.44.wav")
# Stranger

['3']




In [88]:
pickle.dump(nb_model, open('model.pkl', 'wb'))