In [1]:
import pandas as pd
import numpy as np
import os

In [54]:
speakers = os.listdir('alexa/')
speakers = speakers[:80]

In [55]:
df = pd.DataFrame(columns=['filepath', 'speaker'])
for speaker in speakers:
    files = os.listdir('alexa/{}/'.format(speaker))
    for file in files:
        filepath = 'alexa/{}/{}'.format(speaker, file)
        df = df.append({'filepath':filepath, 'speaker':speaker}, ignore_index=True)
print(len(speakers))
df.head() # this is just a print statement

80


Unnamed: 0,filepath,speaker
0,alexa/anfcucvo/1.wav,anfcucvo
1,alexa/anfcucvo/2.wav,anfcucvo
2,alexa/anfcucvo/3.wav,anfcucvo
3,alexa/anfcucvo/4.wav,anfcucvo
4,alexa/anfcucvo/5.wav,anfcucvo


In [56]:
from sklearn.model_selection import train_test_split

In [57]:
train, test = train_test_split(df, test_size = 0.25, stratify = df['speaker'])
# the stratify parameter makes the function split data evenly over the speakers column
# this is so we dont get all files of the same speaker in the test set and not the training set
train.head()

Unnamed: 0,filepath,speaker
11,alexa/bfeciyuh/3.wav,bfeciyuh
47,alexa/dsvhdyry/3.wav,dsvhdyry
187,alexa/lbapynyb/3.wav,lbapynyb
317,alexa/wnzlydvj/3.wav,wnzlydvj
158,alexa/kpkwyaut/4.wav,kpkwyaut


In [58]:
import librosa

In [60]:
def extract_features(filename):
    
    X, sample_rate = librosa.load(filename, res_type='kaiser_fast')
    
    # librosa returns an array of 40 arrays, one for each mfcc
    # np.mean takes the mean of each array, so we will be left with an array of size 40
    # the n_mfcc=40 parameter means return 40 mfccs
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
      
    return mfccs.tolist()

In [64]:
# test out the function on one of the files
# print(extract_features(train['filepath'][11]))


In [65]:
# apply the extract features to every element in train and test
train_features = train['filepath'].apply(extract_features)


In [66]:
# train_features is now an array of arrays
test_features = test['filepath'].apply(extract_features)
train_features.head()

11     [-597.6613159179688, 75.76461029052734, -2.416...
47     [-453.39544677734375, 80.40377807617188, 1.121...
187    [-425.58892822265625, 122.54962158203125, -30....
317    [-439.25750732421875, 150.1184539794922, -16.1...
158    [-453.8094482421875, 120.584716796875, -17.251...
Name: filepath, dtype: object

In [67]:
test_features.head()

202    [-388.9583435058594, 90.2202377319336, -9.0481...
151    [-636.6281127929688, 79.7216796875, -25.271593...
293    [-399.783203125, 133.75177001953125, -23.45753...
268    [-567.4603881835938, 119.63312530517578, 35.07...
197    [-506.3456115722656, 95.40189361572266, 5.7141...
Name: filepath, dtype: object

In [68]:
len(train_features[114])
# just an array with 40 elements

40

In [69]:
# split into X and Y where X is the features and Y is the label (name of speaker)
# remember that each array is still in the same order as above 
# so each element in X_train corresponds to an element in Y_train at the same index
X_train = train_features.tolist()
X_test = test_features.tolist()
Y_train = train['speaker'].tolist()
Y_test = test['speaker'].tolist()

In [70]:
# now X_train is a 2d array, and each array is the long array of mfccs
print("Speaker: {}".format(Y_train[0]))
print("Features: {}: ".format(X_train[0]))

Speaker: bfeciyuh
Features: [-597.6613159179688, 75.76461029052734, -2.4160850048065186, 10.877646446228027, -7.995478630065918, 13.980165481567383, -13.169893264770508, 0.2100803703069687, 4.426782131195068, -5.444142818450928, 4.540070533752441, -6.761120796203613, -2.2482190132141113, 0.3148479759693146, 1.0842169523239136, -2.2032830715179443, -2.1875431537628174, -1.2984910011291504, -0.3666723370552063, 3.854919672012329, -2.4984068870544434, -2.5980982780456543, -0.0878404825925827, -3.405689001083374, -0.8611510396003723, 2.0397136211395264, -2.844575881958008, -2.659059524536133, -2.696981430053711, -2.453705072402954, -1.3845765590667725, -4.259166240692139, -2.073028326034546, -1.8004119396209717, -1.030044674873352, 1.810433268547058, 2.6391336917877197, 2.7678656578063965, 1.492274284362793, 1.3482919931411743]: 


In [71]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [72]:
# hot encode y 
lb = LabelEncoder()
Y_train_encoded = to_categorical(lb.fit_transform(Y_train))
Y_test_encoded = to_categorical(lb.fit_transform(Y_test))

In [73]:
print(Y_train_encoded[0])

[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


In [74]:
print(len(X_train[1])) # array of 40 elements (mean of mfcc)

40


In [75]:
from sklearn.preprocessing import StandardScaler

In [76]:
print(type(X_train[0]))
print(len(Y_train_encoded[0])) # number of unique speakers

<class 'list'>
80


In [77]:
ss = StandardScaler()


In [78]:
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.fit_transform(X_test)

In [79]:
X_train_scaled.shape

(254, 40)

In [83]:
len(X_test_scaled)

85

In [80]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.callbacks import EarlyStopping

In [96]:
model = Sequential()

model.add(Dense(40, input_shape=(40,), activation = 'relu'))
model.add(Dropout(0.1))

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.25))  

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))    

model.add(Dense(80, activation = 'softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

# early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')


In [99]:
history = model.fit(X_train_scaled, Y_train_encoded, batch_size=256, epochs=20, validation_split=.1)# , callbacks=[early_stop]) #, validation_data=(X_test_scaled, Y_test_encoded))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
