In [1]:
import librosa as lr
import librosa.display
import numpy as np
from sklearn.model_selection import train_test_split
from preprocess import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
DATA_PATH = "./train/audio/"

In [3]:
def to_mfcc(file_path, max_len=11):
    y, sr = librosa.load(file_path, mono=True, sr=None)
    y = y[::3]
    mfcc = librosa.feature.mfcc(y, sr=16000)
    
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

In [4]:
import os
from tqdm import tqdm
def save_data(path=DATA_PATH):
    names = os.listdir(path)
    
    for name in names:
        mfcc_list = []
        files = [path + name + '/' + file for file in os.listdir(path + '/' + name)]
        for file in tqdm(files, "Saving vectors of label - '{}'".format(name)):
            mfcc = to_mfcc(file)
            mfcc_list.append(mfcc)
        np.save(name + '.npy', mfcc_list)

In [5]:
def get_train_test(split_ratio=0.6, random_state=42):
    path=DATA_PATH
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))

    X = np.load(labels[0] + '.npy')
    y = np.zeros(X.shape[0])

    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)

In [14]:
save_data()
X_train, X_test, y_train, y_test = get_train_test()
X_train = X_train.reshape(X_train.shape[0], 20, 11, 1)
X_test = X_test.reshape(X_test.shape[0], 20, 11, 1)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

Saving vectors of label - 'bed': 100%|██████████| 1713/1713 [00:13<00:00, 125.93it/s]
Saving vectors of label - 'cat': 100%|██████████| 1733/1733 [00:13<00:00, 127.55it/s]
Saving vectors of label - 'dog': 100%|██████████| 1746/1746 [00:13<00:00, 128.04it/s]
Saving vectors of label - 'happy': 100%|██████████| 1742/1742 [00:13<00:00, 128.27it/s]
Saving vectors of label - 'house': 100%|██████████| 1750/1750 [00:13<00:00, 126.72it/s]


In [10]:
X_train, X_test, y_train, y_test = get_train_test()
X_train = X_train.reshape(X_train.shape[0], 20, 11, 1)
X_test = X_test.reshape(X_test.shape[0], 20, 11, 1)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

In [11]:
def Model_v2():
    weight_decay = 1e-4
    
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(2,2), padding='same', 
                     activation='elu', kernel_regularizer=regularizers.l2(weight_decay),
                     input_shape=(20,11,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=(2,2), activation='elu',
                    kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2))) 
    model.add(Dropout(0.2))
   
    model.add(Conv2D(filters=64, kernel_size=(2,2), padding='same', activation='relu',
                    kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=64, kernel_size=(2,2), padding='same', activation='relu',
                    kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.3))
    
    model.add(Conv2D(128, (2,2), padding='same', activation='elu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (2,2), padding='same', activation='elu',
                     kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.4))
    
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))
    
    optimizer = rmsprop(lr=0.001, decay=1e-6)
    model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
    
    return model

In [12]:
def predict(filepath, model):
    sample = to_mfcc(filepath)
    sample_reshaped = sample.reshape(1, 20, 11, 1)
    return get_labels()[0][np.argmax(model.predict(sample_reshaped))]

In [16]:
model = Model_v2()
model.summary()
history = model.fit(X_train, y_train_hot, batch_size=128, epochs=50, verbose=1, validation_data=(X_test, y_test_hot))
model.evaluate(X_test, y_test_hot)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 20, 11, 32)        160       
_________________________________________________________________
batch_normalization_19 (Batc (None, 20, 11, 32)        128       
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 19, 10, 32)        4128      
_________________________________________________________________
batch_normalization_20 (Batc (None, 19, 10, 32)        128       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 9, 5, 32)          0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 9, 5, 32)          0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 9, 5, 64)          8256      
__________

[0.2829515112582197, 0.9441565918593003]

In [18]:
def get_labels(path=DATA_PATH):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)

In [33]:
print(predict('./1.wav', model=model))

bed
