In [14]:
import pandas as pd
import os

In [16]:
speakers = os.listdir('alexa/')

In [39]:
df = pd.DataFrame(columns=['speaker','filepath'])
for speaker in speakers:
    files = os.listdir('alexa/{}/'.format(speaker))
    for file in files:
        filepath = 'alexa/{}/{}'.format(speaker, file)
        df = df.append({'speaker':speaker, 'filepath':filepath}, ignore_index=True)
print(df)

      speaker              filepath
0    anfcucvo  alexa/anfcucvo/1.wav
1    anfcucvo  alexa/anfcucvo/2.wav
2    anfcucvo  alexa/anfcucvo/3.wav
3    anfcucvo  alexa/anfcucvo/4.wav
4    anfcucvo  alexa/anfcucvo/5.wav
..        ...                   ...
364  zmnsojmf  alexa/zmnsojmf/4.wav
365  zzgleilo  alexa/zzgleilo/1.wav
366  zzgleilo  alexa/zzgleilo/2.wav
367  zzgleilo  alexa/zzgleilo/3.wav
368  zzgleilo  alexa/zzgleilo/4.wav

[369 rows x 2 columns]


In [40]:
from sklearn.model_selection import train_test_split

In [65]:
train, test = train_test_split(df, test_size = 0.25, stratify = df['speaker'])
# the stratify parameter makes the function split data evenly over the speakers column
# this is so we dont get all files of the same speaker in the test set and not the training set
print(train)

      speaker              filepath
24   bqblxokh  alexa/bqblxokh/4.wav
227  onnnswlx  alexa/onnnswlx/3.wav
73   fsyeviyq  alexa/fsyeviyq/1.wav
248  rseqbffb  alexa/rseqbffb/2.wav
52   fbakxffn  alexa/fbakxffn/4.wav
..        ...                   ...
197  lperewmq  alexa/lperewmq/1.wav
161  krdssgoc  alexa/krdssgoc/3.wav
244  rnivxgyz  alexa/rnivxgyz/2.wav
360  zgmrhuwb  alexa/zgmrhuwb/4.wav
298  vlfafluc  alexa/vlfafluc/4.wav

[276 rows x 2 columns]


In [66]:
import librosa

In [67]:
def extract_features(filename):
    
    X, sample_rate = librosa.load(filename, res_type='kaiser_fast')
    
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)

    return mfccs

In [74]:
print(extract_features(train['filepath'][24]))


[[-5.4602295e+02 -4.6497708e+02 -4.4563998e+02 ... -5.3794788e+02
  -5.3871210e+02 -5.5161633e+02]
 [ 6.6446342e+01  1.2070842e+02  1.2920062e+02 ...  7.0282883e+01
   7.1751129e+01  6.9234131e+01]
 [ 2.5904257e+01 -2.0075550e+00 -7.6244440e+00 ...  3.0608452e+01
   3.4209656e+01  3.9892284e+01]
 ...
 [ 4.5199814e+00  3.4188241e-01 -6.9050020e-01 ...  4.8367424e+00
   4.6415453e+00  5.2037086e+00]
 [ 5.6523366e+00  1.3504209e-01 -8.2652754e-01 ...  5.9185467e+00
   4.6401515e+00  5.3823233e+00]
 [ 2.6489525e+00 -9.6214569e-01 -7.7273321e-01 ...  4.7274547e+00
   3.8416429e+00  5.5333214e+00]]


In [69]:
train_features = train['filepath'].apply(extract_features)
print(train_features)

24     [[-546.02295, -464.97708, -445.63998, -495.865...
227    [[-622.87915, -622.87915, -622.87915, -622.123...
73     [[-640.9671, -646.6486, -638.73047, -629.2242,...
248    [[-600.41797, -600.41797, -590.71405, -551.712...
52     [[-697.3323, -697.3323, -697.3323, -697.3323, ...
                             ...                        
197    [[-437.8819, -464.9391, -558.758, -572.0993, -...
161    [[-597.7482, -504.89355, -468.78607, -477.7529...
244    [[-826.1128, -745.0435, -716.5024, -714.4289, ...
360    [[-554.9979, -543.29236, -532.9461, -528.5909,...
298    [[-633.24756, -633.24756, -633.24756, -626.226...
Name: filepath, Length: 276, dtype: object
