In [44]:
def add_white_noise(y, rate=0.002):
    return y + rate*np.random.randn(len(y))

# data augmentation: shift sound in timeframe
def shift_sound(y, rate=2):
    return np.roll(y, int(len(y)//rate))

# data augmentation: stretch sound
def stretch_sound(y, rate=1.1):
    input_length = len(y)
    y = librosa.effects.time_stretch(y, rate)
    if len(y)>input_length:
        return y[:input_length]
    else:
        return np.pad(y, (0, max(0, input_length - len(y))), "constant")

In [66]:
import librosa
import numpy as np
import os
import glob
from keras.utils import np_utils


def GetWAVfiles(folder_name):
    waves_PATH = folder_name + '/*.wav'
    wavfiles = glob.glob(waves_PATH)
    return wavfiles

def getMFCCmeans(folder_name, label):
    n_mfcc = 20
    genre_x = []
    genre_y = np.zeros((0, 1), dtype='int')

    wavfiles = GetWAVfiles(folder_name)
    print(wavfiles)
    for i, wav in enumerate(wavfiles):
        y, sr = librosa.load(wav)
        for aug in range(4):
            if aug == 0:
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                #mean = np.mean(mfcc, axis = 1)
                genre_x.append(mfcc)
                genre_y = np.vstack((genre_y, label))
            if aug == 1:
                y = add_white_noise(y)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                #mean = np.mean(mfcc, axis = 1)
                genre_x.append(mfcc)
                genre_y = np.vstack((genre_y, label))
            if aug == 2:
                y = shift_sound(y)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                #mean = np.mean(mfcc, axis = 1)
                genre_x.append(mfcc)
                genre_y = np.vstack((genre_y, label))
            if aug == 3:
                y = stretch_sound(y)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                #mean = np.mean(mfcc, axis = 1)
                genre_x.append(mfcc)
                genre_y = np.vstack((genre_y, label))

        print(f'{i+1}/{len(wavfiles)} loaded: {wav}')
    genre_x = np.array(genre_x)
    return genre_x, genre_y

if __name__ == '__main__':
    Goodmorning_x, Goodmorning_y = getMFCCmeans('Goodmorning', 0)
    Goodnight_x, Goodnight_y = getMFCCmeans('Goodnight' , 1)
    Letsgo_x, Letsgo_y = getMFCCmeans('Letsgo' , 2)

    X = np.r_[Goodmorning_x, Goodnight_x,Letsgo_x]
    Y = np.r_[Goodmorning_y, Goodnight_y,Letsgo_y]
    Y = np_utils.to_categorical(Y)
    X = X[:, :, :,np.newaxis]
    print(X.shape,Y.shape)
    print(X,Y)

    np.save('X.npy', X)
    np.save('Y.npy', Y)

['Goodmorning/ohayo2.wav', 'Goodmorning/ohayou.wav', 'Goodmorning/ohayou1.wav', 'Goodmorning/ohayou3.wav', 'Goodmorning/ohayou4.wav']
(20, 216)
1/5 loaded: Goodmorning/ohayo2.wav
(20, 216)
2/5 loaded: Goodmorning/ohayou.wav
(20, 216)
3/5 loaded: Goodmorning/ohayou1.wav
(20, 216)
4/5 loaded: Goodmorning/ohayou3.wav
(20, 216)
5/5 loaded: Goodmorning/ohayou4.wav
['Goodnight/oyasuim4.wav', 'Goodnight/oyasumi.wav', 'Goodnight/oyasumi1.wav', 'Goodnight/oyasumi2.wav', 'Goodnight/oyasumi3.wav']
(20, 216)
1/5 loaded: Goodnight/oyasuim4.wav
(20, 216)
2/5 loaded: Goodnight/oyasumi.wav
(20, 216)
3/5 loaded: Goodnight/oyasumi1.wav
(20, 216)
4/5 loaded: Goodnight/oyasumi2.wav
(20, 216)
5/5 loaded: Goodnight/oyasumi3.wav
['Letsgo/yaruzo.wav', 'Letsgo/yaruzo1.wav', 'Letsgo/yaruzo2.wav', 'Letsgo/yaruzo3.wav', 'Letsgo/yaruzo4.wav']
(20, 216)
1/5 loaded: Letsgo/yaruzo.wav
(20, 216)
2/5 loaded: Letsgo/yaruzo1.wav
(20, 216)
3/5 loaded: Letsgo/yaruzo2.wav
(20, 216)
4/5 loaded: Letsgo/yaruzo3.wav
(20, 216)
5

In [67]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, train_size=0.8)



In [61]:
def build_convolution_perceptron():
    model = Sequential()
    model.add(Conv2D(16, kernel_size=(4, 4),
                     activation='relu',
                     input_shape=(20,216,1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    return model

def build_multilayer_perceptron():
    model = Sequential()
    model.add(Dense(16, input_shape=(20, )))
    model.add(Activation('relu'))
    model.add(Dense(3))
    model.add(Activation('softmax'))
    return model

In [62]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.optimizers import adam
model = build_convolution_perceptron()


Amsgrad = adam(lr=0.0001, decay=1e-6, amsgrad=True)
model.compile(optimizer=Amsgrad,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(train_X, train_Y, epochs=50, batch_size=1, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50

KeyboardInterrupt: 

In [68]:
#model.save('my_model0.h5')
from keras.models import load_model
model = load_model('my_model.h5')
loss, accuracy = model.evaluate(test_X, test_Y, verbose=0)
print("Accuracy = {:.2f}".format(accuracy))

Accuracy = 1.00


In [41]:
test = X[0].reshape(1,20, 216, 1)
model.predict(test)#[40].argmax()

array([[9.9947172e-01, 4.1603486e-04, 1.1226243e-04]], dtype=float32)