In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

import librosa
import os as os
from scipy.misc import comb
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm
from keras.models import load_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
DATA_PATH = "./large_files/audio/"  


def get_labels(path=DATA_PATH):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)
# Get available labels
labels, indices, _ = get_labels(DATA_PATH)


# Getting first arrays
X = np.load(labels[0] + '.npy')
y = np.zeros(X.shape[0])

# Append all of the dataset into one single array, same goes for y
for i, label in enumerate(labels[1:]):
    x = np.load(label + '.npy')
    X = np.vstack((X, x))
    y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

assert X.shape[0] == len(y)

FileNotFoundError: [Errno 2] No such file or directory: '_background_noise_.npy'

In [3]:
%reload_ext autoreload
%autoreload 2

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical


# Second dimension of the feature is dim2
feature_dim_2 = 11

# # Feature dimension
feature_dim_1 = 20
channel = 1
epochs = 5
batch_size = 50
verbose = 2
num_classes = 30

# Reshaping to perform 2D convolution
X = X.reshape(X.shape[0], feature_dim_1, feature_dim_2, channel)


y_hot = to_categorical(y)

In [55]:
def get_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
    model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

# Predicts one sample
def predict(filepath, model):
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    return get_labels()[0][
            np.argmax(model.predict(sample_reshaped))
    ]

In [56]:
k=2
num_val_samples = len(X) // k
all_scores = []


for i in range(k):
    print('processing fold #', i)
    # Prepare the validation data: data from partition # k
    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = y_hot[i * num_val_samples: (i + 1) * num_val_samples]

    # Prepare the training data: data from all other partitions
    partial_train_data = np.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [y_hot[:i * num_val_samples],
         y_hot[(i + 1) * num_val_samples:]],
        axis=0)

    model = get_model()
    model.fit(partial_train_data, partial_train_targets,
          batch_size=batch_size, 
          epochs=epochs, 
          verbose=verbose
         )
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

processing fold # 0
Epoch 1/5
 - 85s - loss: 1.9101 - acc: 0.3920
Epoch 2/5
 - 37s - loss: 0.9333 - acc: 0.7045
Epoch 3/5
 - 37s - loss: 0.6957 - acc: 0.7837
Epoch 4/5
 - 38s - loss: 0.5819 - acc: 0.8215
Epoch 5/5
 - 35s - loss: 0.5076 - acc: 0.8445
processing fold # 1
Epoch 1/5
 - 86s - loss: 2.1089 - acc: 0.3636
Epoch 2/5
 - 38s - loss: 1.0702 - acc: 0.6743
Epoch 3/5
 - 39s - loss: 0.7952 - acc: 0.7637
Epoch 4/5
 - 36s - loss: 0.6630 - acc: 0.8016
Epoch 5/5
 - 36s - loss: 0.5805 - acc: 0.8301


In [51]:
all_scores

[0.029913473423980222, 0.02484548825710754]

In [57]:
model.save('bad_model.h5')