In [1]:
import pandas as pd       
import os 
import math 
import numpy as np
import matplotlib.pyplot as plt  
import IPython.display as ipd  # To play sound in the notebook
import librosa
import librosa.display

In [3]:
metadata=pd.read_csv('speakers_all.csv')
metadata.head()

Unnamed: 0,age,age_onset,birthplace,filename,native_language,sex,speakerid,country,file_missing?,Unnamed: 9,Unnamed: 10,Unnamed: 11
0,24.0,12.0,"koussi, senegal",balanta,balanta,male,788,senegal,True,,,
1,18.0,10.0,"buea, cameroon",cameroon,cameroon,male,1953,cameroon,True,,,
2,48.0,8.0,"hong, adamawa, nigeria",fulfulde,fulfulde,male,1037,nigeria,True,,,
3,42.0,42.0,"port-au-prince, haiti",haitian,haitian,male,1165,haiti,True,,,
4,40.0,35.0,"port-au-prince, haiti",haitian,haitian,male,1166,haiti,True,,,


In [4]:
metadata = metadata[metadata['file_missing?'] != True]
metadata.shape

(2138, 12)

In [5]:
def features_extractor(file):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features

In [6]:
import numpy as np
from tqdm import tqdm
### Now we iterate through every audio file and extract features 
### using Mel-Frequency Cepstral Coefficients
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = 'recordings/recordings/' + str(row["filename"]) + '.mp3'
    final_class_labels=row["sex"]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

2138it [18:36,  1.92it/s]


In [7]:
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

Unnamed: 0,feature,class
0,"[-283.2144, 102.64268, 12.902011, 43.4498, 5.5...",female
1,"[-252.67426, 99.67917, 0.25114948, 48.571484, ...",male
2,"[-298.53323, 111.03142, 3.1933663, 43.25243, 1...",male
3,"[-250.61511, 117.96154, 8.878023, 51.569702, 6...",male
4,"[-329.30942, 110.29973, 10.1238165, 16.642046,...",male


In [8]:
extracted_features_df.to_csv('MFCC_Extracted_features.csv', index=False)

In [9]:
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [10]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))
y

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [11]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [12]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((1710, 40), (428, 40), (1710, 2), (428, 2))

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [14]:
input_shape = (40,)
num_labels = 2

model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               4100      
_________________________________________________________________
activation (Activation)      (None, 100)               0         
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               20200     
_________________________________________________________________
activation_1 (Activation)    (None, 200)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               2

In [15]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [16]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved models/Gender_Classifier_ANN.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.78812, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 1.78812 to 0.82553, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.82553 to 0.68440, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 0.68440 to 0.67512, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.67512
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.67512
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.67512
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.67512
Epoch 9/100

Epoch 00009: val_loss improved from 0.67512 to 0.66730, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 10/100

Epoch 00010: val_loss improved from 0.66730 to 0.66062, saving model to saved models\Gender_Classifier_ANN.hd


Epoch 00037: val_loss did not improve from 0.21488
Epoch 38/100

Epoch 00038: val_loss did not improve from 0.21488
Epoch 39/100

Epoch 00039: val_loss did not improve from 0.21488
Epoch 40/100

Epoch 00040: val_loss did not improve from 0.21488
Epoch 41/100

Epoch 00041: val_loss improved from 0.21488 to 0.21178, saving model to saved models\Gender_Classifier_ANN.hdf5
Epoch 42/100

Epoch 00042: val_loss did not improve from 0.21178
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.21178
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.21178
Epoch 45/100

Epoch 00045: val_loss did not improve from 0.21178
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.21178
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.21178
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.21178
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.21178
Epoch 50/100

Epoch 00050: val_loss did not improve from 0.21178
Epoch 51/100

Epoch 00051: val_loss did no


Epoch 00079: val_loss did not improve from 0.21178
Epoch 80/100

Epoch 00080: val_loss did not improve from 0.21178
Epoch 81/100

Epoch 00081: val_loss did not improve from 0.21178
Epoch 82/100

Epoch 00082: val_loss did not improve from 0.21178
Epoch 83/100

Epoch 00083: val_loss did not improve from 0.21178
Epoch 84/100

Epoch 00084: val_loss did not improve from 0.21178
Epoch 85/100

Epoch 00085: val_loss did not improve from 0.21178
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.21178
Epoch 87/100

Epoch 00087: val_loss did not improve from 0.21178
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.21178
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.21178
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.21178
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.21178
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.21178
Epoch 93/100

Epoch 00093: val_loss did not improve from 0.21178
Epoch 94/100

Epoch 00094: val_loss di

In [17]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.927570104598999


In [18]:
model.predict_classes(X_test)



array([0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,

In [20]:
filename="recordings/recordings/english381.mp3"
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)

predicted_label=model.predict_classes(mfccs_scaled_features)

prediction_class = labelencoder.inverse_transform(predicted_label) 

predicted_label, prediction_class

(array([1], dtype=int64), array(['male'], dtype='<U6'))