In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import librosa
import librosa.feature
import glob
import numpy as np
from utils import load_data

## Data preprocessing

In [2]:
trainX, trainY, validationX, validationY, testX, testY = load_data(split_into_10s = True)

KeyboardInterrupt: 

In [9]:
def normalize_data(data, mu=None, std=None):
    if mu is None or std is None:
        mu = np.mean(data)
        std = np.std(data)
        
    norm_data = data.copy() - mu
    norm_data /= std
    return norm_data, mu, std

In [3]:
#Reshaping into (batch_size, height, width, channels)
trainX = trainX.reshape((trainX.shape[0], trainX.shape[1], trainX.shape[2], 1))
validationX = validationX.reshape((validationX.shape[0], validationX.shape[1], validationX.shape[2], 1))
testX = testX.reshape((testX.shape[0], testX.shape[1], testX.shape[2], 1))



In [10]:
trainX, mu, std = normalize_data(trainX)
validationX, _, _ = normalize_data(validationX, mu, std)
testX, _, _ = normalize_data(testX, mu, std)

In [4]:
trainX.shape
trainY.shape

(44936, 5)

## Basic CNN

In [5]:
indim_x = 128
indim_y = 106

In [11]:
CNN_model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3,3), input_shape=(indim_x, indim_y, 1)),
  tf.keras.layers.BatchNormalization(momentum=0.9),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.MaxPooling2D(2, 2),
  tf.keras.layers.Dropout(0.25),
  tf.keras.layers.Conv2D(32, (3,3)),
  tf.keras.layers.BatchNormalization(momentum=0.9),
  tf.keras.layers.Activation('relu'),
  tf.keras.layers.MaxPooling2D(2,2), 
  tf.keras.layers.Dropout(0.25),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(100, activation='relu'),
  tf.keras.layers.Dense(5, activation='softmax')
])

In [12]:
CNN_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
CNN_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 126, 104, 16)      160       
_________________________________________________________________
batch_normalization_2 (Batch (None, 126, 104, 16)      64        
_________________________________________________________________
activation_2 (Activation)    (None, 126, 104, 16)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 63, 52, 16)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 63, 52, 16)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 61, 50, 32)        4640      
_________________________________________________________________
batch_normalization_3 (Batch (None, 61, 50, 32)       

In [13]:
CNN_model.fit(trainX, trainY, batch_size = 128, validation_data=(validationX, validationY), epochs=18)

Train on 44936 samples, validate on 5407 samples
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18


<tensorflow.python.keras.callbacks.History at 0x23d2efe1348>

In [37]:
CNN_model.evaluate(testX, testY, verbose=2)

5055/1 - 1s - loss: 0.8067 - accuracy: 0.7177


[0.8141717711730716, 0.71770525]