# import modules

In [1]:
import librosa
import os
import pandas as pd
import librosa
import glob
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

# Define targeting words

In [2]:
words = ['one','two','three','four','five','six','seven','eight','nine','zero']

# Generate DataFrame of filepath -> label

In [3]:
data = []

for word in words:
    for file in os.listdir('./words/sc/'+word):
        data.append([word+'/'+file,word])

labels = pd.DataFrame(data,columns=['filepath','label'])

In [4]:
labels

Unnamed: 0,filepath,label
0,one/988e2f9a_nohash_0.wav,one
1,one/8d4cdc60_nohash_1.wav,one
2,one/93ec8b84_nohash_0.wav,one
3,one/6272b231_nohash_1.wav,one
4,one/439c84f4_nohash_1.wav,one
...,...,...
23661,zero/a60a09cf_nohash_1.wav,zero
23662,zero/2c7c33e8_nohash_0.wav,zero
23663,zero/540d8427_nohash_0.wav,zero
23664,zero/173ae793_nohash_1.wav,zero


# Parse feautres

In [6]:
y, sr = librosa.load('./words/sc/'+'one/988e2f9a_nohash_0.wav')
ps = librosa.feature.melspectrogram(y=y,sr=sr)
ps.shape

(128, 44)

In [7]:
def parser(filepath):
#    label = row.label
   # function to load files and extract features
   file_name = os.path.join(os.path.abspath('./words/sc'), filepath)
   # print(filename)
   # handle exception to check if there isn't a file which is corrupted
   feature = None
   try:
      # here kaiser_fast is a technique used for faster extraction
      y, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
      # we extract mfcc feature from data
      ps = librosa.feature.melspectrogram(y=y, sr=sample_rate)
      print(filepath)
      if ps.shape[1]<32:
         return None
      else:
         return ps[0:128,0:32]
#       mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=60).T,axis=0) 
   except Exception as e:
      print("Error encountered while parsing file: ", file)
      return [None,None]
 
#    return [feature, label]

# THIS CELL RUNS LOOOOOONG

In [8]:
labels['feature'] = labels['filepath'].map(parser)

one/988e2f9a_nohash_0.wav
one/8d4cdc60_nohash_1.wav
one/93ec8b84_nohash_0.wav
one/6272b231_nohash_1.wav
one/439c84f4_nohash_1.wav
one/6a27a9bf_nohash_0.wav
one/b87bdb22_nohash_1.wav
one/97f4c236_nohash_2.wav
one/563aa4e6_nohash_2.wav
one/51055bda_nohash_4.wav
one/a8cb6dda_nohash_0.wav
one/5fe4a278_nohash_0.wav
one/e4a2cf79_nohash_0.wav
one/dabf67d9_nohash_0.wav
one/742d6431_nohash_3.wav
one/62581901_nohash_0.wav
one/708a9569_nohash_1.wav
one/bf8d5617_nohash_0.wav
one/229978fd_nohash_3.wav
one/65f2531f_nohash_1.wav
one/686d030b_nohash_1.wav
one/409c962a_nohash_1.wav
one/aef8dcf5_nohash_0.wav
one/8012c69d_nohash_3.wav
one/98582fee_nohash_1.wav
one/8134f43f_nohash_0.wav
one/ffd2ba2f_nohash_4.wav
one/dd086776_nohash_1.wav
one/65d844fc_nohash_0.wav
one/53eb0a88_nohash_0.wav
one/712e4d58_nohash_2.wav
one/e77d88fc_nohash_1.wav
one/1ba20be2_nohash_1.wav
one/a6d586b7_nohash_1.wav
one/b43c8f2f_nohash_1.wav
one/d750966e_nohash_1.wav
one/c0445658_nohash_4.wav
one/e1469561_nohash_1.wav
one/e3e49931

In [13]:
df = labels[labels.feature!= None]

In [15]:
labels

Unnamed: 0,filepath,label,feature
0,one/988e2f9a_nohash_0.wav,one,"[[3.538189e-05, 9.701841e-06, 9.662868e-07, 5...."
1,one/8d4cdc60_nohash_1.wav,one,"[[0.25821248, 0.26854718, 0.71036494, 0.320634..."
2,one/93ec8b84_nohash_0.wav,one,"[[0.00018324668, 0.0002281351, 0.00040254733, ..."
3,one/6272b231_nohash_1.wav,one,"[[0.00053598, 0.0012571685, 0.0005986494, 0.00..."
4,one/439c84f4_nohash_1.wav,one,"[[0.0056350594, 0.0019324248, 0.0010398972, 0...."
...,...,...,...
23661,zero/a60a09cf_nohash_1.wav,zero,"[[0.32521176, 0.18648, 0.07034619, 0.10869359,..."
23662,zero/2c7c33e8_nohash_0.wav,zero,"[[0.00015468769, 0.00010447703, 2.887968e-05, ..."
23663,zero/540d8427_nohash_0.wav,zero,"[[0.22501943, 0.056041177, 1.6744532e-06, 1.59..."
23664,zero/173ae793_nohash_1.wav,zero,"[[0.0008425674, 0.0013567662, 0.0010940629, 0...."


# Prepare Training set

In [57]:
df=pd.DataFrame(temp.to_list(),columns = ['feature','label'])
X = np.array(df.feature.tolist())
y = np.array(df.label.tolist())
lb = LabelEncoder()
y = np_utils.to_categorical(lb.fit_transform(y))

In [None]:
X.shape

In [59]:
y.shape

(23666, 10)

In [61]:
def make_Model(batch_size):
    input_shape = (128,32,1)
    num_labels = len(words)
    # build model
    model = Sequential()

    model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
    model.add(MaxPooling2D((4, 2), strides=(4, 2)))
    model.add(Activation('relu'))

    model.add(Conv2D(48, (5, 5), padding="valid"))
    model.add(MaxPooling2D((4, 2), strides=(4, 2)))
    model.add(Activation('relu'))

    model.add(Conv2D(48, (5, 5), padding="valid"))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dropout(rate=0.5))

    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(rate=0.5))

    model.add(Dense(num_labels))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

def train_Model(split,batch_size=32):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=22)
    model = make_Model(batch_size)
    model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test))
    return model


In [15]:
model1 = train_Model(0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
train_Model(0.5)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.engine.sequential.Sequential at 0x1560ef7f0>

In [62]:
history = train_Model(0.2,32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [54]:
df.to_csv('./words/number_40_input.csv', index=False)