(1) Importing dependency

In [1]:
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout, Flatten,\
                         Conv1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.utils import shuffle

import pandas as pd
import librosa
from tqdm import tqdm_notebook as tqdm
import os

import numpy as np
np.random.seed(13)
import random
random.seed(13)

import matplotlib.pyplot as plt
% matplotlib inline

Using TensorFlow backend.
UsageError: Line magic function `%` not found.


(2) Get Data

In [5]:
base_dir = 'data/'
data_dir = base_dir+'data_speech_commands_v0.02/'
 
train_txt = base_dir+'train_16words.txt'
val_txt = base_dir+'validation_16words.txt'
test_txt = base_dir+'test_16words.txt'

In [2]:
class_list = ['zero', 'one', 'two', 'three', 'four', 
              'five', 'six', 'seven', 'eight', 'nine', 
              'bed', 'bird', 'tree', 'cat', 'house',
              'dog']

class_dict = {_class:i for i, _class in enumerate(class_list)}

In [3]:
def load_wavs(filenames):
    first_wav, _ = librosa.load(filenames[0])
    wav_shape = first_wav.shape[0]
    x_data = [librosa.util.fix_length(librosa.load(filename)[0], wav_shape) 
              for filename in tqdm(filenames)]
    return np.asarray(x_data)
    
### If you have lack of memory, Use this
#     wav, _ = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i] = librosa.load(filename)[0][:]
#     return wavs

def make_x_data(filenames):
    x_data = load_wavs(filenames)
    x_data = np.reshape(x_data, (*x_data.shape, 1))
    return x_data
    
def extract_class_from_filename(filename):
    dirname = os.path.dirname(filename)
    class_name = dirname.split('/')[-1]
    return class_name

def make_y_data(filenames, y_dict):
    return np.asarray([to_categorical(y_dict[extract_class_from_filename(filename)], len(class_list)) 
                       for filename in filenames])

def make_xy_data(filenames, y_dict):
    x_data = make_x_data(filenames)
    y_data = make_y_data(filenames, y_dict)
    return x_data, y_data

In [6]:
print('Make train data.......')
with open(train_txt, 'r') as f:
    train_filename_list = [data_dir+line for line in f.read().splitlines()][:100]
x_train, y_train = make_xy_data(train_filename_list, class_dict)
x_train, y_train = shuffle(x_train, y_train)

print('Make validation data.......')
with open(val_txt, 'r') as f:
    val_filename_list = [data_dir+line for line in f.read().splitlines()][:30]
x_val, y_val = make_xy_data(val_filename_list, class_dict)
x_val, y_val = shuffle(x_train, y_train)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)

Make train data.......


HBox(children=(IntProgress(value=0), HTML(value='')))


Make validation data.......


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))


(100, 22050, 1) (100, 16) (100, 22050, 1) (100, 16)


(3) Create a sequential model

In [12]:
model=Sequential()

# Layer 1
model.add(Conv1D (kernel_size=3, filters=128, strides=3, padding='valid',
                  kernel_initializer='he_uniform', input_shape=x_train.shape[1:]))                  
model.add(BatchNormalization())
model.add(Activation('relu'))

# Layer 2
model.add(Conv1D (kernel_size=3, filters=128, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 3
model.add(Conv1D (kernel_size=3, filters=128, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 4
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 5
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 6
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 7
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 8
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 9
model.add(Conv1D (kernel_size=3, filters=256, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 10
model.add(Conv1D (kernel_size=3, filters=512, padding='same', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=3, strides=3))

# Layer 11
model.add(Dropout(0.5))
model.add(Flatten())

# Layer 12
model.add(Dense(10))
model.add(Activation('softmax'))
model.summary()

ValueError: Negative dimension size caused by subtracting 3 from 1 for 'max_pooling1d_34/MaxPool' (op: 'MaxPool') with input shapes: [?,1,1,512].

(4) Compile 

In [None]:
model.compile(loss='categorical_crossentropy', 
              optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.9, decay=1e-6, nesterov=True),
              metrics=['accuracy'])

(5) Train

In [None]:
model_path = '/users/lww/data/checkpoint/SampleCNN_based_check_point/'
os.makedirs(model_path, exist_ok=True)
model_filename = model_path+'{epoch:02d}-{val_loss:.4f}.hdf5'
checkpointer = ModelCheckpoint(filepath = model_filename, monitor = "val_loss", verbose=1, save_best_only=True)

early_stopping = EarlyStopping(monitor='val_loss', patience=100)

In [None]:
hist = model.fit(x_train, y_train, batch_size=64, epochs=10000, validation_data=[x_val, y_val], 
                 shuffle=True, callbacks = [checkpointer, early_stopping])

In [None]:
fig, loss_ax = plt.subplots()
loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
loss_ax.legend(loc='upper left')

plt.show()

(6) Test

In [None]:
print('Make test data.......')
with open(test_txt, 'r') as f:
    test_filename_list = [data_dir+line for line in f.read().splitlines()]
x_test, y_test = make_xy_data(test_filename_list, class_dict)
x_test, y_test = shuffle(x_test, y_test)

In [None]:
model = load_model(model_path+'85-0.0714.hdf5')

In [None]:
[loss, accuracy] = model.evaluate(x_test, y_test)
print('Loss:', loss, 'Accuracy:', accuracy)

In [None]:
pred = model.predict(x_test)
test_f1_score = f1_score(y_test, pred > 0.5)
print('F1 Score:', test_f1_score)

In [None]:
%timeit -n 5 -r 5 model.predict(x_test)