# import modules

In [1]:
import librosa
import os
import pandas as pd
import librosa
import glob
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

# Define targeting words

In [231]:
words = ['dog','bird','cat']

# Generate DataFrame of filepath -> label

In [236]:
data = []

for word in words:
    for file in os.listdir('./words/sc/'+word):
        data.append([word+'/'+file,word])

labels = pd.DataFrame(data,columns=['filepath','label'])

In [237]:
labels

Unnamed: 0,filepath,label
0,dog/988e2f9a_nohash_0.wav,dog
1,dog/a7545b9f_nohash_1.wav,dog
2,dog/6a27a9bf_nohash_0.wav,dog
3,dog/a9abc695_nohash_1.wav,dog
4,dog/f428ca69_nohash_1.wav,dog
...,...,...
5205,cat/da2c5f1b_nohash_2.wav,cat
5206,cat/93f30cc4_nohash_1.wav,cat
5207,cat/c0c0d87d_nohash_0.wav,cat
5208,cat/f216055e_nohash_0.wav,cat


# Parse feautres

In [238]:
def parser(row):
   filepath = row.filepath
   label = row.label
   # function to load files and extract features
   file_name = os.path.join(os.path.abspath('./words/sc'), filepath)
   # print(filename)
   # handle exception to check if there isn't a file which is corrupted
   try:
      # here kaiser_fast is a technique used for faster extraction
      X, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
      # we extract mfcc feature from data
      mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
   except Exception as e:
      print("Error encountered while parsing file: ", file)
      return [None,None]
 
   feature = mfccs
 
   return [feature, label]

# THIS CELL RUNS LOOOOOONG

In [241]:
temp = labels.apply(parser, axis=1)

# Prepare Training set

In [242]:
df=pd.DataFrame(temp.to_list(),columns = ['feature','label'])
X = np.array(df.feature.tolist())
y = np.array(df.label.tolist())
lb = LabelEncoder()
y = np_utils.to_categorical(lb.fit_transform(y))

In [244]:
X.shape

(5210, 40)

In [245]:
y.shape

(5210, 3)

In [246]:
def make_Model():
    filter_size = 2
    num_labels = 3
    # build model
    model = Sequential()

    model.add(Dense(256, input_shape=(40,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_labels))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

def train_Model(split):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=22)
    model = make_Model()
    model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test))
    return model


In [247]:
model1 = train_Model(0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [250]:
train_Model(0.5)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.engine.sequential.Sequential at 0x15436b6a0>

In [249]:
df.to_csv('./words/input.csv', index=False)