In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
speakers = os.listdir('alexa/')

In [143]:
df = pd.DataFrame(columns=['filepath', 'speaker'])
for speaker in speakers:
    files = os.listdir('alexa/{}/'.format(speaker))
    for file in files:
        filepath = 'alexa/{}/{}'.format(speaker, file)
        df = df.append({'filepath':filepath, 'speaker':speaker}, ignore_index=True)
print(len(speakers))
df.head() # this is just a print statement

87


Unnamed: 0,filepath,speaker
0,alexa/anfcucvo/1.wav,anfcucvo
1,alexa/anfcucvo/2.wav,anfcucvo
2,alexa/anfcucvo/3.wav,anfcucvo
3,alexa/anfcucvo/4.wav,anfcucvo
4,alexa/anfcucvo/5.wav,anfcucvo


In [4]:
from sklearn.model_selection import train_test_split

In [5]:
train, test = train_test_split(df, test_size = 0.25, stratify = df['speaker'])
# the stratify parameter makes the function split data evenly over the speakers column
# this is so we dont get all files of the same speaker in the test set and not the training set
train.head()

Unnamed: 0,filepath,speaker
3,alexa/anfcucvo/4.wav,anfcucvo
266,alexa/toiwzwrl/4.wav,toiwzwrl
368,alexa/zzgleilo/4.wav,zzgleilo
225,alexa/onnnswlx/1.wav,onnnswlx
365,alexa/zzgleilo/1.wav,zzgleilo


In [25]:
import librosa

In [27]:
librosa.__version__

'0.8.0'

In [72]:
def extract_features(filename):
    
    X, sample_rate = librosa.load(filename, res_type='kaiser_fast')
    
    # librosa returns an array of 40 arrays, one for each mfcc
    # np.mean takes the mean of each array, so we will be left with an array of size 40
    # the n_mfcc=40 parameter means return 40 mfccs
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
      
    return mfccs.tolist()

In [73]:
# test out the function on one of the files
print(extract_features(train['filepath'][178]))


[-319.6217956542969, 148.2321319580078, 7.4388346672058105, 43.691959381103516, -12.536559104919434, 14.930658340454102, -13.931859970092773, -10.716937065124512, 2.3703205585479736, -2.962925910949707, 10.01125717163086, -3.053565502166748, 4.673168182373047, -0.6584608554840088, 1.6229711771011353, 2.242025136947632, -6.614349842071533, -4.553686618804932, -3.0322628021240234, -5.750484466552734, -8.927915573120117, -1.6755651235580444, -2.0000736713409424, 0.23309101164340973, 1.6361275911331177, 2.6695241928100586, 5.513359546661377, 2.7833445072174072, 2.9495532512664795, 1.6522233486175537, 1.3291791677474976, -1.1747522354125977, -0.5632534623146057, 0.29200637340545654, 0.3599144518375397, 2.6883511543273926, 0.42928919196128845, 1.7083054780960083, 0.8912426829338074, 0.21304486691951752]


In [74]:
# apply the extract features to every element in train and test
train_features = train['filepath'].apply(extract_features)


In [75]:
# train_features is now an array of arrays
test_features = test['filepath'].apply(extract_features)
train_features.head()

3      [-517.032470703125, 104.11851501464844, -3.678...
266    [-390.765869140625, 119.85955047607422, 19.727...
368    [-444.8863830566406, 103.65220642089844, -18.3...
225    [-425.0965576171875, 89.916015625, -7.29105377...
365    [-471.3912658691406, 100.19432067871094, -9.63...
Name: filepath, dtype: object

In [76]:
test_features.head()

168    [-487.532470703125, 105.15260314941406, 34.326...
326    [-391.30303955078125, 93.7576904296875, -26.33...
359    [-514.9548950195312, 133.59536743164062, -34.7...
201    [-410.9751281738281, 88.24861907958984, 4.1486...
47     [-453.39544677734375, 80.40377807617188, 1.121...
Name: filepath, dtype: object

In [88]:
len(train_features[142])
# just an array with 40 elements

40

In [101]:
# split into X and Y where X is the features and Y is the label (name of speaker)
# remember that each array is still in the same order as above 
# so each element in X_train corresponds to an element in Y_train at the same index
X_train = train_features.tolist()
X_test = test_features.tolist()
Y_train = train['speaker'].tolist()
Y_test = test['speaker'].tolist()

In [102]:
# now X_train is a 2d array, and each array is the long array of mfccs
print("Speaker: {}".format(Y_train[0]))
print("Features: {}: ".format(X_train[0]))

Speaker: anfcucvo
Features: [-517.032470703125, 104.11851501464844, -3.6789493560791016, 21.895641326904297, -17.589296340942383, 19.453096389770508, -0.5379565954208374, 10.556646347045898, 5.225306510925293, 1.1143144369125366, 12.795035362243652, -2.8566532135009766, 7.599920749664307, 4.759021759033203, 2.1658146381378174, -4.635499954223633, 2.867617607116699, 0.8376765847206116, -7.942584037780762, 2.16778826713562, -2.937751531600952, -3.4222733974456787, -0.035209257155656815, -1.471524953842163, -2.977267265319824, -3.166192054748535, -2.108940601348877, -1.968748927116394, -2.189997434616089, -2.1241023540496826, -3.226284980773926, -3.221952199935913, -3.2832448482513428, -2.4119808673858643, -0.7238619327545166, -2.454301118850708, -0.8603566884994507, -1.3303098678588867, -1.2850427627563477, -0.8773266673088074]: 


In [103]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [122]:
# hot encode y 
lb = LabelEncoder()
Y_train_encoded = to_categorical(lb.fit_transform(Y_train))
Y_test_encoded = to_categorical(lb.fit_transform(Y_test))

In [123]:
print(Y_train_encoded[0])

[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [124]:
print(len(X_train[1])) # array of 40 elements (mean of mfcc)

40


In [125]:
from sklearn.preprocessing import StandardScaler

In [126]:
print(type(X_train[0]))

<class 'list'>


In [127]:
ss = StandardScaler()


In [137]:
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.fit_transform(X_test)

In [138]:
X_train_scaled.shape

(276, 40)

In [139]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.callbacks import EarlyStopping

In [162]:
model = Sequential()

model.add(Dense(40, input_shape=(40,), activation = 'relu'))
model.add(Dropout(0.1))

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.25))  

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))    

model.add(Dense(87, activation = 'softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')


In [163]:
history = model.fit(X_train_scaled, Y_train_encoded, batch_size=256, epochs=10, validation_data=(X_test_scaled, Y_test_encoded),callbacks=[early_stop])

Epoch 1/10

ValueError: in user code:

    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\engine\training.py:1224 test_function  *
        return step_function(self, iterator)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\engine\training.py:1215 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\engine\training.py:1208 run_step  **
        outputs = model.test_step(data)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\engine\training.py:1176 test_step
        self.compiled_loss(
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\losses.py:149 __call__
        losses = ag_call(y_true, y_pred)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\losses.py:1535 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\keras\backend.py:4687 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    C:\Users\alast\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 86) and (None, 87) are incompatible
