In [1]:
#imports
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import os
import numpy as np
import random
from skimage.measure import block_reduce
import time
import math

In [2]:
#keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.models import model_from_json

Using TensorFlow backend.


In [3]:
#SETTINGS
image_height = 12
image_width = 18
input_shape = [image_height, image_width,1]
num_labels = 7
num_epochs = 80
batch_size = 6
test_size = 0.2
labels = ["down","go","left","on","right","stop","up"]
PATH = "C:\\Users\\Filip\\Desktop\\Jupyter"
npy_dataset_path = "C:\\Users\\Filip\\Desktop\\Jupyter\\npy_dataset"

In [4]:
#make spectrogram out of wav_file, transform it (reshape,gray,normalize)
#and save it as a file.npy
def make_spectrogram(wav_file,save_file):
    nfft=512
    overlap=511
    #read file
    sr, data = wavfile.read(wav_file)
    #make subplot out of it
    figure,axes = plt.subplots(1)
    figure.subplots_adjust(left=0,right=1,bottom=0,top=1)
    axes.axis("off")
    #make spectrogram
    axes.specgram(x=data,Fs=sr,noverlap=overlap,NFFT=nfft)
    axes.axis("off")
    #change figsize
    plt.rcParams["figure.figsize"]=[0.8,0.5]
    figure.canvas.draw()
    #get size and convert image to RGB byte string
    width, height = figure.get_size_inches() * figure.get_dpi()
    image = np.frombuffer(figure.canvas.tostring_rgb(), dtype=np.uint8)
    #reshape
    image = np.reshape(image, (int(height), int(width), 3))
    plt.close(figure)
    #make image gray
    gray = np.dot(image[...,:3],[0.299,0.587,0.114])
    #normalize image
    gray = (gray - gray.min())/(gray.max()-gray.min())
    np.save(save_file, gray)
    return gray


In [5]:
#load spectrogram from npy_file
def load_spectrogram(npy_file):
    npy_array = np.load(npy_file)
    return npy_array

In [6]:
def make_model():
    #model 
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3,3), activation="relu", 
                    input_shape=input_shape))
    model.add(Conv2D(64, kernel_size=(3,3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels, activation="softmax"))
    
    model.compile(optimizer=keras.optimizers.adam(), 
                  loss=keras.losses.categorical_crossentropy, 
                  metrics=["accuracy"])
    model.summary()
    return model

In [7]:
def split_train_test(npy_dataset, test_size):
    train = []
    test = []
    #split dataset into train and test lists, equally for each label
    for label_name in labels:
        labeled_list = []
        for file_name in os.listdir(npy_dataset):
            if label_name in file_name:
                labeled_list.append(file_name)
        split_index = math.floor(len(labeled_list)*test_size)
        test.extend(labeled_list[:split_index])
        train.extend(labeled_list[split_index:])
    #create x,y train and x,y test in correct size
    x_train = np.zeros((len(train), image_height, image_width))
    y_train = np.zeros(len(train))
    x_test = np.zeros((len(test), image_height, image_width))
    y_test = np.zeros(len(test))
    
    #load spectrograms to x,y train and x,y test
    for index, file_name in enumerate(train):
        spec = load_spectrogram(npy_dataset+"\\"+file_name)
        if(spec.shape == (image_height, image_width)):
            x_train[index, :, :] = spec
            y_train[index] = file_name[0]
    for index, file_name in enumerate(test):
        spec = load_spectrogram(npy_dataset+"\\"+file_name)
        if(spec.shape == (image_height, image_width)):
            x_test[index, :, :] = spec
            y_test[index] = file_name[0]
    
    return x_train, x_test, y_train, y_test
        

In [8]:
def train_model(model, npy_dataset):
    #split and reshape dataset
    x_train, x_test, y_train, y_test = split_train_test(npy_dataset,test_size)
    x_train = x_train.reshape(x_train.shape[0], image_height,
                              image_width, 1)
    x_test = x_test.reshape(x_test.shape[0],image_height, 
                            image_width, 1)
    y_train = keras.utils.to_categorical(y_train, num_labels)
    y_test = keras.utils.to_categorical(y_test, num_labels)
    
    #train
    model.fit(x_train, y_train, batch_size=batch_size, 
              epochs=num_epochs, verbose=2, 
              validation_data=(x_test, y_test))
    #save model
    file = model.to_json()
    with open("model.json", "w") as json:
        json.write(file)
    #save weights
    model.save_weights("weights.h5")
    
    return model

In [9]:
model = make_model()
model = train_model(model, npy_dataset_path)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 10, 16, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 14, 64)         18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 7, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 4, 7, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1792)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               229504    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)              

Epoch 73/80
 - 3s - loss: 0.1318 - accuracy: 0.9546 - val_loss: 0.8089 - val_accuracy: 0.7947
Epoch 74/80
 - 3s - loss: 0.1141 - accuracy: 0.9602 - val_loss: 0.8956 - val_accuracy: 0.7877
Epoch 75/80
 - 3s - loss: 0.1229 - accuracy: 0.9574 - val_loss: 0.7922 - val_accuracy: 0.8003
Epoch 76/80
 - 3s - loss: 0.1163 - accuracy: 0.9588 - val_loss: 0.9029 - val_accuracy: 0.7989
Epoch 77/80
 - 3s - loss: 0.1219 - accuracy: 0.9567 - val_loss: 0.8964 - val_accuracy: 0.7947
Epoch 78/80
 - 3s - loss: 0.1103 - accuracy: 0.9595 - val_loss: 0.9413 - val_accuracy: 0.7835
Epoch 79/80
 - 3s - loss: 0.1094 - accuracy: 0.9626 - val_loss: 0.8761 - val_accuracy: 0.8017
Epoch 80/80
 - 3s - loss: 0.1184 - accuracy: 0.9637 - val_loss: 0.9442 - val_accuracy: 0.7821
