# Training
The purpose of this notebook is to train a Convolution Recurrent Neural Network to learn the audio beep sequence emmitted by a Mele washing machine at the end of a wash cycle.

The model configuration is based on the "Convolutional Recurrent Neural Networks for Small-Footprint Keyword Spotting" paper i.e.:

```
Per-channel normalized mel-spectrograms
        |
        V
Convolution layer
        |
        V
Recurrent layers
        |
        V
Fully-connected layer
        |
        v
    Softmax
```


In [27]:
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.models import Sequential

from keras.layers import Dense, Dropout, Activation, Flatten, Reshape
from keras.layers import Convolution2D, Convolution3D
from keras.layers.wrappers import TimeDistributed
from keras.layers.recurrent import GRU, LSTM
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger
from keras.utils import np_utils

from keras import backend as K

import tensorflow as tf

tf.python.control_flow_ops = tf

# fit parameters
batch_size = 64
nb_classes = 2
nb_epoch = 12

# number of convolutional filters to use
nb_conv_filters = 32
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (5, 20)
# strides
strides = (2,8)
# 40 channel x 401 timesteps x 1 image plane
img_rows = 40
img_cols = 351
window_size = 100
input_shape = (img_rows,img_cols,1)
nb_rnn_filters = 32
nb_dense_filters = 64

# fix random seed for reproducibility
seed = 1337
np.random.seed(seed)  # for reproducibility

# Load Training and Test Data

In [28]:
import numpy as np
import os
mel_directory = r'/Volumes/ThorsHammer/Data Science/data/audio-recognition/mel_3.5/'
trainX = np.load(os.path.join(mel_directory,"161225-002.wav-mel.npy"))
y_train = np.load(os.path.join(mel_directory,"161225-002-mel-labels.npy"))
testX = np.load(os.path.join(mel_directory,"161225-003.wav-mel.npy"))
y_test = np.load(os.path.join(mel_directory,"161225-003-mel-labels.npy"))

In [29]:
print(trainX.shape)
print(y_train.shape)
print(testX.shape)
print(y_test.shape)

(4792, 40, 351)
(4792,)
(5506, 40, 351)
(5506,)


In [30]:
x_train = trainX.reshape(trainX.shape[0], img_rows, img_cols, 1)
print(x_train.shape)
x_test = testX.reshape(testX.shape[0], img_rows, img_cols, 1)
print(x_test.shape)

(4792, 40, 351, 1)
(5506, 40, 351, 1)


In [31]:
input_shape

(40, 351, 1)

In [34]:
def make_model(ishape):
        model = Sequential()
        model.add(
            Convolution2D(
                nb_conv_filters, 
                kernel_size=kernel_size, 
                strides=strides,
                input_shape=ishape,
                kernel_initializer="he_normal", 
                activation='relu', 
                data_format='channels_last', 
                padding='valid'
            )
        )
        model.add(Reshape((756, 32)))

        model.add(GRU(nb_rnn_filters, return_sequences=True))
        model.add(GRU(nb_rnn_filters))
        
        model.add(Dense(nb_dense_filters)) # Binary Classification
        model.add(Dense(1))
        model.add(Activation('sigmoid'))
        model.compile(loss='binary_crossentropy',
                          optimizer='adam',
                          metrics=['binary_accuracy'])
        return model
model = make_model(input_shape)
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 18, 42, 32)        3232      
_________________________________________________________________
reshape_6 (Reshape)          (None, 756, 32)           0         
_________________________________________________________________
gru_7 (GRU)                  (None, 756, 32)           6240      
_________________________________________________________________
gru_8 (GRU)                  (None, 32)                6240      
_________________________________________________________________
dense_6 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
_________________________________________________________________
activation_3 (Activation)    (None, 1)                 0         
Total para

In [35]:
def generate_arrays_from_file(filename):
    while True:
        with open(path, 'r') as f:
            for line in f:
                # create Numpy arrays of input data
                # and labels, from each line in the file
                x, y = process_line(line)
                yield (x, y)

In [36]:
import time
sample_length=40
model_type = 'CRNN'
data_type = 'images'

print('input_shape: ',input_shape)
print('X_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Helper: Save the model.
checkpointer = ModelCheckpoint(
    filepath='./checkpoints/' + model_type + '-' + data_type + \
        '.{epoch:03d}-{val_loss:.3f}.hdf5',
    verbose=1,
    save_best_only=True)

# Helper: TensorBoard
tb = TensorBoard(log_dir='./logs')

# Helper: Stop when we stop learning.
early_stopper = EarlyStopping(patience=10)

# Helper: Save results.
timestamp = time.time()
csv_logger = CSVLogger('./logs/' + model_type + '-' + 'training-' + \
    str(timestamp) + '.log')

model = make_model(input_shape)
print(model.summary())
model.fit(
    x_train, 
    y_train, 
    batch_size=batch_size, 
    nb_epoch=nb_epoch,
    verbose=1, 
    validation_data=(x_test, y_test),
    callbacks=[checkpointer, tb, early_stopper, csv_logger]
)

('input_shape: ', (40, 351, 1))
('X_train shape:', (4792, 40, 351, 1))
(4792, 'train samples')
(5506, 'test samples')
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 18, 42, 32)        3232      
_________________________________________________________________
reshape_7 (Reshape)          (None, 756, 32)           0         
_________________________________________________________________
gru_9 (GRU)                  (None, 756, 32)           6240      
_________________________________________________________________
gru_10 (GRU)                 (None, 32)                6240      
_________________________________________________________________
dense_8 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 65        
________________________

<keras.callbacks.History at 0x11b1dcc90>

In [37]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix

scores = model.evaluate(x_test, y_test, verbose=0)

prob_nn = model.predict_proba(x_test, verbose=0)
y_pred = model.predict_classes(x_test)
auc = metrics.roc_auc_score(y_test,prob_nn[:])
logloss= metrics.log_loss(y_test,prob_nn[:])

print('auc: %0.2f'%auc)
print('logloss: %0.2f'%logloss)
print('confusion matrix')
confusion_matrix(y_test,y_pred)

auc: 0.19
logloss: 0.09
confusion matrix


array([[5406,    0],
       [ 100,    0]])

In [None]:
%%time
# The input shape for the model needs to be defined depending on the active backend
if K.image_dim_ordering() == 'th': # Theano
    X_all = X.values.reshape(X.shape[0], 1, sample_length)
    input_shape = (1, sample_length)
else: # TensorFlow
    X_all = X.values.reshape(X.shape[0], sample_length, 1)
    input_shape = (sample_length, 1)

model = make_model(input_shape)
history = model.fit(np.array(X_all), response, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1)

model.save('keyword_spotting_washing_mc.h5')

In [63]:
y_test

array([ 0.,  0.,  0., ...,  0.,  0.,  0.], dtype=float32)