# Keras model
****
The model in premiliminary CNN we can try to build with Keras. Keras will enable us to drop the unknown class, and instead let the model decide on a 'none-of-the-above' label.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from glob import glob
import tensorflow as tf
import pandas as pd
import matplotlib.image as img
import numpy as np
from sklearn.metrics import confusion_matrix
import time
#from imblearn.under_sampling import RandomUnderSampler
from sklearn.utils import resample
from datetime import timedelta
import math
import re
import os
from PIL import Image

import keras
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


## Setting up constants
****
First we will set up our constants and filepaths

In [2]:
# Convolutional Layer 1.
filter_size1 = 7         # Convolution filters are 13x13 pixels.
num_filters1 = 56         # There are 16 of these filters.

# Convolutional Layer 2.
filter_size2 = 7         # Convolution filters are 13x13 pixels.
num_filters2 = 112         # There are 36 of these filters.

# Fully-connected layer.
fc_size = 512             # Number of neurons in fully-connected layer.
fc_size_2 = 256

# The number of pixels in each dimension of an image.
img_height = 128 # 161 for spectrogram, 128 for mfcc
img_width = 32 # 99 for spec, 32 for mfcc

# The images are stored in one-dimensional arrays of this length.
img_size_flat = img_height * img_width

# Tuple with height and width of images used to reshape arrays.
img_shape = (128,32) #(161,99) for spec

# Number of classes, one class for each of 10 digits.
num_classes = 12

# Number of colour channels for the images: 1 channel for gray-scale.
num_channels = 1

In [3]:
POSSIBLE_LABELS = 'yes no up down left right on off stop go silence unknown'.split()
id2name = {i: name for i, name in enumerate(POSSIBLE_LABELS)}
name2id = {name: i for i, name in id2name.items()}
len(id2name)

12

## Loading data
****
Like in the previous model, we will load the data first as filenames into a dataframe

In [4]:
def load_data(data_dir):
    """ Return 2 lists of tuples:
    [(class_id, user_id, path), ...] for train
    [(class_id, user_id, path), ...] for validation
    """
    # Just a simple regexp for paths with three groups:
    # prefix, label, user_id
    pattern = re.compile("(.+\/)?(\w+)\/([^_]+)_") # for file types add '.+(type)'
    all_files = glob(os.path.join(data_dir, 'mfcc/train/*/*')) # for file types at (type)
                                                            # file path csv for 1sec exactly, and csv_full
                                                            # for all file
                                                            # for mfcc use mfcc/

    with open(os.path.join(data_dir, 'train/validation_list.txt'), 'r') as fin:
        validation_clips = fin.readlines()
        validation_files = [x[:-3] for x in validation_clips] # for file types at "+'(type)'" to [:-3]
    valset = set()
    for entry in validation_files:
        r = re.match(pattern, entry)
        if r:
            valset.add(r.group(3))

    possible = set(POSSIBLE_LABELS)
    train, val = [], []
    for entry in all_files:
        r = re.match(pattern, entry)
        if r:
            label, uid = r.group(2), r.group(3)
            if label == '_background_noise_':
                label = 'silence'
            if label not in possible:
                label = 'unknown'

            label_id = name2id[label]
            label_vec = np.eye(len(id2name))[label_id]

            sample = (label, label_id, label_vec, uid, entry)
            if uid in valset:
                #if # only include val set that is precisely 1 sec
                val.append(sample)
            else:
                train.append(sample)

    print('There are {} train and {} val samples'.format(len(train), len(val)))
    
    columns_list = ['label', 'label_id', 'label_vec', 'user_id', 'file_name']
    
    train_df = pd.DataFrame(train, columns = columns_list)
    valid_df = pd.DataFrame(val, columns = columns_list)
    
    return train_df, valid_df

In [5]:
train_df, valid_df = load_data('')
imp_cols = ['label','label_vec','user_id','file_name']
valid_df.head()

There are 57923 train and 6798 val samples


Unnamed: 0,label,label_id,label_vec,user_id,file_name
0,right,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",9190045a,mfcc/train/right/9190045a_nohash_0
1,right,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",a6d586b7,mfcc/train/right/a6d586b7_nohash_4
2,right,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",dca2797e,mfcc/train/right/dca2797e_nohash_4
3,right,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",538e1856,mfcc/train/right/538e1856_nohash_0
4,right,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",a6d586b7,mfcc/train/right/a6d586b7_nohash_3


### Next we will reduce the unknown class, the same way as before.

In [6]:
avg_size = np.int(train_df[train_df['label_id'] != 11]['label_id'].value_counts().mean())
df_maj = train_df[train_df['label_id'] == 11]
df_rest = train_df[train_df['label_id'] != 11]
columns_list = ['label_id', 'label', 'label_vec', 'user_id', 'file_name']

df_majority_downsampled = resample(df_maj, 
                                   replace=False,    # sample without replacement
                                   n_samples=avg_size,     # to match minority class
                                   random_state=5)

train_DS = pd.concat([df_majority_downsampled, df_rest])
train_DS.head()

Unnamed: 0,label,label_id,label_vec,user_id,file_name
38923,unknown,11,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1625acd8,mfcc/train/marvin/1625acd8_nohash_1
53094,unknown,11,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",9aa5439d,mfcc/train/five/9aa5439d_nohash_2
19083,unknown,11,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",60472d26,mfcc/train/nine/60472d26_nohash_1
10120,unknown,11,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44dad20e,mfcc/train/happy/44dad20e_nohash_1
57029,unknown,11,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",937b433e,mfcc/train/four/937b433e_nohash_0


In [7]:
train_DS['label_id'].value_counts()

8     2134
0     2116
2     2115
9     2112
5     2111
11    2110
6     2110
4     2106
1     2105
7     2101
3     2095
Name: label_id, dtype: int64

# Getting Ready for Keras
****
using the template from https://github.com/spmallick/learnopencv/blob/master/KerasCNN-CIFAR/keras-cnn-cifar10.ipynb
I want to make sure everything fits right

In [8]:
#print('Training data shape : ', train_images.shape, train_labels.shape)

#print('Testing data shape : ', test_images.shape, test_labels.shape)

# Find the unique numbers from the train labels
classes = train_DS['label'].unique()#np.unique(train_labels)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

Total number of outputs :  11
Output classes :  ['unknown' 'right' 'go' 'no' 'left' 'stop' 'up' 'down' 'yes' 'on' 'off']


In [9]:
# Find the shape of input images and create the variable input_shape
nRows,nCols,nDims = img_height, img_width, num_channels
#train_data = train_images.reshape(train_images.shape[0], nRows, nCols, nDims)
#test_data = test_images.reshape(test_images.shape[0], nRows, nCols, nDims)
input_shape = (nRows, nCols, nDims)

# Change to float datatype
#train_data = train_data.astype('float32')
#test_data = test_data.astype('float32')

# Change the labels from integer to categorical data
#train_labels_one_hot = to_categorical(train_labels)
#test_labels_one_hot = to_categorical(test_labels)

train_data = np.array([pd.read_csv(x, sep=',',header=None).T.values.tolist()[0] for x in train_DS['file_name']])
train_labels_one_hot = np.array([y for y in batch_df['label_vec']])

test_data = np.array([pd.read_csv(x, sep=',',header=None).T.values.tolist()[0] for x in valid_df['file_name']])
test_labels_one_hot = np.array([y for y in valid_df['label_vec']])

KeyboardInterrupt: 

# Keras Model
****
Now we will build our Keras model using the same architecture as the tensorflow one. In tensorflow, we found we were able to exceed 78%.

In [18]:
def createModel():
    model = Sequential()
    # The first two layers with 32 filters of window size 3x3
    model.add(Conv2D(num_filters1, (filter_size1, filter_size1), 
                     padding='same', activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(num_filters2, (filter_size2, filter_size2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    model.add(Dropout(0.5))
    
    model.add(Dense(fc_size, activation='relu'))
    model.add(Dense(fc_size_2, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nClasses, activation='softmax'))
    
    return model

## Training the model

In [None]:
model1 = createModel()
batch_size = 50
epochs = 1000
model1.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

model1.summary()

############

history = model1.fit(train_data, train_labels_one_hot, batch_size=batch_size, epochs=epochs, verbose=1, 
                   validation_data=(test_data, test_labels_one_hot))
model1.evaluate(test_data, test_labels_one_hot)

In [None]:
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)

In [None]:
plt.figure(figsize=[8,6])
plt.plot(history.history['acc'],'r',linewidth=3.0)
plt.plot(history.history['val_acc'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)