#### 1. Imports

In [24]:
from configparser import ConfigParser
from tqdm import tqdm
import numpy as np
import librosa
import os

from sklearn.model_selection import train_test_split

from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Conv1D
from keras.layers import BatchNormalization, Reshape, LSTM
from keras.optimizers import Adam
from keras.utils import to_categorical

#### 2. Prepare data

In [25]:
# instantiate
config = ConfigParser()
    
# parse existing file
config.read('config.ini')

in_data = config.get('section_path', 'in_path')
out_data = config.get('section_path', 'out_path')

Keys = config.get('section_keys', 'Keys')
Keys = Keys.split()

In [28]:
def wav2mfcc(file_path, mfcclen=65):
    # read file
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    
    # calculate power spectrum of a sound
    mfcc = librosa.feature.mfcc(wave, sr=16000)
    
    # pad if required to get 32 len
    if (mfcclen > mfcc.shape[1]):
        pad_width = mfcclen - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:,:mfcclen]
    return mfcc

In [31]:
for Key in Keys:
    if not os.path.isdir(in_data + Key):
        Keys.remove(Key)
        continue
    # init mfcc
    mfcc_v = []
    wavfiles = [in_data + Key + '/' + wavfile for wavfile in os.listdir(in_data + Key)]
    for wavfile in tqdm(wavfiles, "label - '{}'".format(Key), total=len(wavfiles), unit="files"):
        mfcc_v.append(wav2mfcc(wavfile))
    # save it
    np.save(out_data + Key + '.npy', mfcc_v)

label - 'left': 100%|██████████| 207/207 [00:02<00:00, 93.99files/s]
label - 'right': 100%|██████████| 208/208 [00:02<00:00, 94.87files/s]
label - 'stop': 100%|██████████| 181/181 [00:01<00:00, 97.33files/s]
label - 'go': 100%|██████████| 189/189 [00:01<00:00, 96.18files/s]
label - 'noise': 100%|██████████| 81/81 [00:00<00:00, 94.74files/s]


In [32]:
for Key in Keys:
    if not os.path.isdir(in_data + Key):
        Keys.remove(Key)
        continue

Keys_Indx = np.arange(0, len(Keys))

# Lets create X, Y for Test and X, Y for validation
for Idx, Key in enumerate(Keys):
    print(Idx, Key)
    if(Idx == 0):
        X = np.load(out_data + Keys[Idx] + '.npy')
        Y = np.zeros(X.shape[Idx])
    else:
        x = np.load(out_data + Keys[Idx] + '.npy')
        X = np.vstack((X, x))
        Y = np.append(Y, np.full(x.shape[0], fill_value = Idx))
        
    assert X.shape[0] == len(Y)
    
XT, XV, YT, YV = train_test_split(X, Y, test_size= (1 - 0.9), random_state=42, shuffle=True)

0 left
1 right
2 stop
3 go
4 noise


In [33]:
# create one-hot vectors for YT and YV
YT_HOT = to_categorical(YT)
YV_HOT = to_categorical(YV)

#### 2. Model

In [34]:
def model(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    model = Sequential()
    
    model.add(LSTM(units=128, input_shape=input_shape, return_sequences = True))
    
    model.add(LSTM(units=128, return_sequences = True))
    
    model.add(LSTM(units=20, return_sequences = False))
    
    model.add(Dense(units=len(Keys), activation='softmax'))
    
    return model

In [35]:
model = model(input_shape = XT.shape[1:])

In [36]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 20, 128)           99328     
_________________________________________________________________
lstm_5 (LSTM)                (None, 20, 128)           131584    
_________________________________________________________________
lstm_6 (LSTM)                (None, 20)                11920     
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 105       
Total params: 242,937
Trainable params: 242,937
Non-trainable params: 0
_________________________________________________________________


#### 4. Training

In [37]:
model.compile(loss=categorical_crossentropy,
                  optimizer=Adadelta(),
                  metrics=['accuracy'])

In [38]:
for t in range(2):
    model.fit(XT, YT_HOT, batch_size=64, epochs=100, verbose=2, validation_data=(XV, YV_HOT))

Train on 779 samples, validate on 87 samples
Epoch 1/100
 - 3s - loss: 1.3446 - acc: 0.5250 - val_loss: 1.0624 - val_acc: 0.7126
Epoch 2/100
 - 1s - loss: 0.8545 - acc: 0.7599 - val_loss: 0.8734 - val_acc: 0.6437
Epoch 3/100
 - 1s - loss: 0.6710 - acc: 0.7843 - val_loss: 0.5861 - val_acc: 0.8046
Epoch 4/100
 - 1s - loss: 0.4993 - acc: 0.8460 - val_loss: 0.9861 - val_acc: 0.6897
Epoch 5/100
 - 1s - loss: 0.3212 - acc: 0.9127 - val_loss: 0.3661 - val_acc: 0.8736
Epoch 6/100
 - 1s - loss: 0.2675 - acc: 0.9191 - val_loss: 0.4381 - val_acc: 0.8391
Epoch 7/100
 - 1s - loss: 0.1566 - acc: 0.9615 - val_loss: 0.3583 - val_acc: 0.9080
Epoch 8/100
 - 1s - loss: 0.0879 - acc: 0.9807 - val_loss: 1.3441 - val_acc: 0.5632
Epoch 9/100
 - 1s - loss: 0.2093 - acc: 0.9435 - val_loss: 0.4208 - val_acc: 0.8621
Epoch 10/100
 - 1s - loss: 0.0919 - acc: 0.9820 - val_loss: 0.2805 - val_acc: 0.8966
Epoch 11/100
 - 1s - loss: 0.0348 - acc: 0.9974 - val_loss: 0.3480 - val_acc: 0.9080
Epoch 12/100
 - 1s - loss: 0.

Epoch 95/100
 - 1s - loss: 1.9947e-04 - acc: 1.0000 - val_loss: 0.6124 - val_acc: 0.8736
Epoch 96/100
 - 1s - loss: 1.9663e-04 - acc: 1.0000 - val_loss: 0.6118 - val_acc: 0.8736
Epoch 97/100
 - 1s - loss: 1.9398e-04 - acc: 1.0000 - val_loss: 0.6126 - val_acc: 0.8736
Epoch 98/100
 - 1s - loss: 1.9134e-04 - acc: 1.0000 - val_loss: 0.6133 - val_acc: 0.8736
Epoch 99/100
 - 1s - loss: 1.8878e-04 - acc: 1.0000 - val_loss: 0.6148 - val_acc: 0.8736
Epoch 100/100
 - 1s - loss: 1.8632e-04 - acc: 1.0000 - val_loss: 0.6143 - val_acc: 0.8736
Train on 779 samples, validate on 87 samples
Epoch 1/100
 - 1s - loss: 1.8391e-04 - acc: 1.0000 - val_loss: 0.6105 - val_acc: 0.8736
Epoch 2/100
 - 1s - loss: 1.8151e-04 - acc: 1.0000 - val_loss: 0.6125 - val_acc: 0.8736
Epoch 3/100
 - 1s - loss: 1.7933e-04 - acc: 1.0000 - val_loss: 0.6137 - val_acc: 0.8736
Epoch 4/100
 - 1s - loss: 1.7709e-04 - acc: 1.0000 - val_loss: 0.6134 - val_acc: 0.8736
Epoch 5/100
 - 1s - loss: 1.7491e-04 - acc: 1.0000 - val_loss: 0.614

Epoch 87/100
 - 1s - loss: 8.7199e-05 - acc: 1.0000 - val_loss: 0.6764 - val_acc: 0.8621
Epoch 88/100
 - 1s - loss: 8.6663e-05 - acc: 1.0000 - val_loss: 0.6759 - val_acc: 0.8621
Epoch 89/100
 - 1s - loss: 8.6144e-05 - acc: 1.0000 - val_loss: 0.6770 - val_acc: 0.8621
Epoch 90/100
 - 1s - loss: 8.5625e-05 - acc: 1.0000 - val_loss: 0.6791 - val_acc: 0.8621
Epoch 91/100
 - 1s - loss: 8.5114e-05 - acc: 1.0000 - val_loss: 0.6827 - val_acc: 0.8621
Epoch 92/100
 - 1s - loss: 8.4610e-05 - acc: 1.0000 - val_loss: 0.6832 - val_acc: 0.8621
Epoch 93/100
 - 1s - loss: 8.4114e-05 - acc: 1.0000 - val_loss: 0.6842 - val_acc: 0.8621
Epoch 94/100
 - 1s - loss: 8.3622e-05 - acc: 1.0000 - val_loss: 0.6847 - val_acc: 0.8621
Epoch 95/100
 - 1s - loss: 8.3136e-05 - acc: 1.0000 - val_loss: 0.6844 - val_acc: 0.8621
Epoch 96/100
 - 1s - loss: 8.2650e-05 - acc: 1.0000 - val_loss: 0.6853 - val_acc: 0.8621
Epoch 97/100
 - 1s - loss: 8.2178e-05 - acc: 1.0000 - val_loss: 0.6864 - val_acc: 0.8621
Epoch 98/100
 - 1s - 

In [39]:
model.save_weights('Car_5Keys_weights.h5')