In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

import librosa
import os as os
from scipy.misc import comb
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm
from keras.models import load_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
DATA_PATH = "./large_files/filtered_audio/"  


def get_labels(path=DATA_PATH):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)
# Get available labels
labels, indices, _ = get_labels(DATA_PATH)


# Getting first arrays
X = np.load(labels[0] + '.npy')
y = np.zeros(X.shape[0])

# Append all of the dataset into one single array, same goes for y
for i, label in enumerate(labels[1:]):
    x = np.load(label + '.npy')
    X = np.vstack((X, x))
    y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

assert X.shape[0] == len(y)

In [3]:
%reload_ext autoreload
%autoreload 2

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical


# Second dimension of the feature is dim2
feature_dim_2 = 11

# # Feature dimension
feature_dim_1 = 20
channel = 1
epochs = 
batch_size = 100
verbose = 2
num_classes = 31

# Reshaping to perform 2D convolution
X = X.reshape(X.shape[0], feature_dim_1, feature_dim_2, channel)


y_hot = to_categorical(y)

In [9]:
def get_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
    model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

def get_larger_model():
    model = Sequential()
    model.add(Conv2D(64, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(96, kernel_size=(2, 2), activation='relu'))
    model.add(Conv2D(240, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

# Predicts one sample
def predict(filepath, model):
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    return get_labels()[0][
            np.argmax(model.predict(sample_reshaped))
    ]

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [8]:
#if we want to use kfolds or whatever

# k=2
# num_val_samples = len(X) // k
# all_scores = []


# for i in range(k):
#     print('processing fold #', i)
#     # Prepare the validation data: data from partition # k
#     val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
#     val_targets = y_hot[i * num_val_samples: (i + 1) * num_val_samples]

#     # Prepare the training data: data from all other partitions
#     partial_train_data = np.concatenate(
#         [X[:i * num_val_samples],
#          X[(i + 1) * num_val_samples:]],
#         axis=0)
#     partial_train_targets = np.concatenate(
#         [y_hot[:i * num_val_samples],
#          y_hot[(i + 1) * num_val_samples:]],
#         axis=0)

# (smaller model)
small_model = get_model()
small_model.fit(X, y_hot,
batch_size=batch_size, 
epochs=epochs, 
verbose=verbose         )


#     val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
#     all_scores.append(val_mae)

Epoch 1/50
 - 17s - loss: 2.9055 - acc: 0.1677
Epoch 2/50
 - 6s - loss: 1.6673 - acc: 0.4962
Epoch 3/50
 - 6s - loss: 1.2199 - acc: 0.6391
Epoch 4/50
 - 6s - loss: 1.0176 - acc: 0.7045
Epoch 5/50
 - 6s - loss: 0.8939 - acc: 0.7411
Epoch 6/50
 - 6s - loss: 0.8044 - acc: 0.7699
Epoch 7/50
 - 6s - loss: 0.7455 - acc: 0.7862
Epoch 8/50
 - 6s - loss: 0.6998 - acc: 0.7986
Epoch 9/50
 - 6s - loss: 0.6596 - acc: 0.8100
Epoch 10/50
 - 6s - loss: 0.6285 - acc: 0.8181
Epoch 11/50
 - 6s - loss: 0.5962 - acc: 0.8266
Epoch 12/50
 - 6s - loss: 0.5752 - acc: 0.8337
Epoch 13/50
 - 6s - loss: 0.5533 - acc: 0.8377
Epoch 14/50
 - 6s - loss: 0.5340 - acc: 0.8449
Epoch 15/50
 - 6s - loss: 0.5198 - acc: 0.8480
Epoch 16/50
 - 6s - loss: 0.5094 - acc: 0.8522
Epoch 17/50
 - 6s - loss: 0.4893 - acc: 0.8578
Epoch 18/50
 - 6s - loss: 0.4753 - acc: 0.8623
Epoch 19/50
 - 6s - loss: 0.4660 - acc: 0.8632
Epoch 20/50
 - 6s - loss: 0.4596 - acc: 0.8671
Epoch 21/50
 - 6s - loss: 0.4448 - acc: 0.8711
Epoch 22/50
 - 6s - l

<keras.callbacks.History at 0x13cd139d588>

In [10]:
model = get_larger_model()
model.fit(X, y_hot,
batch_size=batch_size, 
epochs=epochs, 
verbose=verbose)

Epoch 1/50
 - 27s - loss: 2.1391 - acc: 0.3889
Epoch 2/50
 - 11s - loss: 1.0275 - acc: 0.6998
Epoch 3/50
 - 10s - loss: 0.7628 - acc: 0.7781
Epoch 4/50
 - 10s - loss: 0.6297 - acc: 0.8156
Epoch 5/50
 - 10s - loss: 0.5373 - acc: 0.8431
Epoch 6/50
 - 10s - loss: 0.4650 - acc: 0.8626
Epoch 7/50
 - 11s - loss: 0.4141 - acc: 0.8759
Epoch 8/50
 - 11s - loss: 0.3682 - acc: 0.8904
Epoch 9/50
 - 11s - loss: 0.3328 - acc: 0.9008
Epoch 10/50
 - 11s - loss: 0.3048 - acc: 0.9092
Epoch 11/50
 - 10s - loss: 0.2839 - acc: 0.9148
Epoch 12/50
 - 11s - loss: 0.2636 - acc: 0.9205
Epoch 13/50
 - 10s - loss: 0.2439 - acc: 0.9262
Epoch 14/50
 - 10s - loss: 0.2314 - acc: 0.9313
Epoch 15/50
 - 10s - loss: 0.2178 - acc: 0.9350
Epoch 16/50
 - 10s - loss: 0.2113 - acc: 0.9361
Epoch 17/50
 - 11s - loss: 0.2048 - acc: 0.9388
Epoch 18/50
 - 11s - loss: 0.1913 - acc: 0.9428
Epoch 19/50
 - 10s - loss: 0.1814 - acc: 0.9463
Epoch 20/50
 - 11s - loss: 0.1743 - acc: 0.9485
Epoch 21/50
 - 11s - loss: 0.1750 - acc: 0.9491
E

<keras.callbacks.History at 0x13cfffb60f0>

In [14]:
model.save('good_model.h5')

In [11]:
small_model.save('small_model.h5')

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 19, 10, 64)        320       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 18, 9, 96)         24672     
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 17, 8, 240)        92400     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 8, 4, 240)         0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 8, 4, 240)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 7680)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               1966336   
__________