In [1]:
import os
import glob
import wave
import core as co
import pydub
import librosa
import itertools
import numpy as np
import contextlib
import tensorflow as tf
import sklearn.metrics as metrics

In [2]:
paths = ['BD15C', 'BD15L', 'BD6B', 'BD6A']

In [3]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += (window_size / 2)


def extract_features(parent_dir,sub_dirs,file_ext="*.wav",bands = 60, frames = 41, label = 1):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    labels = []
    for l, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            sound_clip,s = librosa.load(fn)
            for (start,end) in windows(sound_clip,window_size):
                if(len(sound_clip[start:end]) == window_size):
                    signal = sound_clip[start:end]
                    melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                    logspec = librosa.logamplitude(melspec)
                    logspec = logspec.T.flatten()[:, np.newaxis].T
                    log_specgrams.append(logspec)
                    labels.append(l)
            
    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features), np.array(labels)

In [4]:
parent_dir = '.'

if os.path.exists('features.npy') and os.path.exists('labels.npy'):
    features = np.load('features.npy')
    labels = np.load('labels.npy')
else:
    features,labels = extract_features(parent_dir,paths,frames=41)
    labels = co.one_hot_encode(labels)
    np.save('features.npy', features)
    np.save('labels.npy', labels)

In [5]:
rnd_indices = np.random.rand(len(labels)) < 0.70

train_x = features[rnd_indices]
train_y = labels[rnd_indices]
test_x = features[~rnd_indices]
test_y = labels[~rnd_indices]

In [6]:
frames = 41
bands = 60

feature_size = bands * frames #60x41
num_labels = 4
num_channels = 2

batch_size = 50
kernel_size = 30
depth = 20
num_hidden = 200

starter_learning_rate = 1e-5
training_iterations = 2000
step = 300
decay = 0.90

In [7]:
X = tf.placeholder(tf.float32, shape=[None, bands, frames, num_channels])
Y = tf.placeholder(tf.float32, shape=[None, num_labels])
phase_train = tf.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)

conv1 = co.conv_layer(X, num_channels, depth, phase_train, 'conv1', kernel_size, kernel_size)
conv2 = co.conv_layer(conv1, depth, depth*2, phase_train, 'conv2', kernel_size, kernel_size)
max_pool_1 = co.max_pool_layer(conv2, name='max_pool_1')
shape = max_pool_1.get_shape().as_list()
max_pool_1_flat = tf.reshape(max_pool_1, [-1, shape[1] * shape[2] * shape[3]])

fc1 = co.fc_layer(max_pool_1_flat, shape[1] * shape[2] * shape[3], num_hidden, phase_train, 'fc1') # fc_layer fc1
y_ = co.fc_layer(tf.nn.relu(fc1), num_hidden, num_labels, phase_train, 'fc2')

In [8]:
learning_rate = tf.train.exponential_decay(starter_learning_rate,global_step,step,decay,staircase=True)

with tf.name_scope('xent'):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y_))
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy, global_step=global_step)
with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
cost_history = np.empty(shape=[1],dtype=float)
with tf.Session() as session:
    session.run(tf.global_variables_initializer())

    for itr in range(training_iterations):    
        offset = (itr * batch_size) % (train_y.shape[0] - batch_size)
        batch_x = train_x[offset:(offset + batch_size), :, :, :]
        batch_y = train_y[offset:(offset + batch_size), :]
        
        _, c, a = session.run([optimizer, cross_entropy, accuracy],feed_dict={X: batch_x, Y : batch_y, phase_train : True})
        cost_history = np.append(cost_history,c)
        if itr % 50 == 0:
            _, tc, ta = session.run([optimizer, cross_entropy, accuracy],feed_dict={X: test_x, Y : test_y, phase_train : False})
            print ('Train cost', c, 'Accuracy', a, 'Test cost', tc, 'Accuracy', ta)

    y_pred = session.run(y_, feed_dict={X: test_x, Y: test_y, phase_train: False})
    print('Test accuracy',round(session.run(accuracy, feed_dict={X: test_x, Y: test_y, phase_train: False}) , 3))
    

('Train cost', 0.70558727, 'Accuracy', 0.75999999, 'Test cost', 5.7117052, 'Accuracy', 0.17091838)
('Train cost', 0.88817763, 'Accuracy', 0.71999997, 'Test cost', 1.260251, 'Accuracy', 0.36734691)
('Train cost', 0.46450895, 'Accuracy', 1.0, 'Test cost', 1.0860244, 'Accuracy', 0.43112242)
('Train cost', 0.52783471, 'Accuracy', 0.89999998, 'Test cost', 1.0425684, 'Accuracy', 0.45408165)
('Train cost', 0.46482244, 'Accuracy', 0.89999998, 'Test cost', 1.0092973, 'Accuracy', 0.59183675)
('Train cost', 0.34233606, 'Accuracy', 0.94, 'Test cost', 0.87130207, 'Accuracy', 0.71428567)
('Train cost', 0.2572763, 'Accuracy', 0.91999996, 'Test cost', 0.79765612, 'Accuracy', 0.67602038)
('Train cost', 0.050639294, 'Accuracy', 1.0, 'Test cost', 0.88691646, 'Accuracy', 0.55102032)
('Train cost', 0.074165225, 'Accuracy', 1.0, 'Test cost', 0.72590262, 'Accuracy', 0.68367338)
('Train cost', 0.19739142, 'Accuracy', 0.98000002, 'Test cost', 0.61495769, 'Accuracy', 0.77551013)
('Train cost', 0.025348749, 'Acc

ResourceExhaustedError: OOM when allocating tensor with shape[392,40,30,21]
	 [[Node: train/gradients/max_pool_1/MaxPool_grad/MaxPoolGrad = MaxPoolGrad[T=DT_FLOAT, data_format="NHWC", ksize=[1, 2, 2, 1], padding="SAME", strides=[1, 2, 2, 1], _device="/job:localhost/replica:0/task:0/gpu:0"](conv2/Relu, max_pool_1/MaxPool, train/gradients/Reshape_grad/Reshape)]]
	 [[Node: train/Adam/update/_38 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_215_train/Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op u'train/gradients/max_pool_1/MaxPool_grad/MaxPoolGrad', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/usr/lib/python2.7/dist-packages/tornado/ioloop.py", line 866, in start
    handler_func(fd_obj, events)
  File "/usr/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-73de7d328604>", line 6, in <module>
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy, global_step=global_step)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 315, in minimize
    grad_loss=grad_loss)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 386, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 560, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 368, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 560, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_grad.py", line 438, in _MaxPoolGrad
    data_format=op.get_attr("data_format"))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 1737, in _max_pool_grad
    data_format=data_format, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

...which was originally created as op u'max_pool_1/MaxPool', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
[elided 18 identical lines from previous traceback]
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-a1a24fae2a21>", line 8, in <module>
    max_pool_1 = co.max_pool_layer(conv2, name='max_pool_1')
  File "core.py", line 47, in max_pool_layer
    return tf.nn.max_pool(input, ksize=ksize, strides=strides, padding=padding)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 1821, in max_pool
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 1638, in _max_pool
    data_format=data_format, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[392,40,30,21]
	 [[Node: train/gradients/max_pool_1/MaxPool_grad/MaxPoolGrad = MaxPoolGrad[T=DT_FLOAT, data_format="NHWC", ksize=[1, 2, 2, 1], padding="SAME", strides=[1, 2, 2, 1], _device="/job:localhost/replica:0/task:0/gpu:0"](conv2/Relu, max_pool_1/MaxPool, train/gradients/Reshape_grad/Reshape)]]
	 [[Node: train/Adam/update/_38 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_215_train/Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(15,10))
plt.plot(cost_history)
# plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()

In [None]:
y_test_class = np.argmax(test_y, axis=1)
y_pred_class = np.argmax(y_pred, axis=1)
conf_mat = metrics.confusion_matrix(y_test_class, y_pred_class)

In [None]:
# Compute ROC curve and ROC area for each class
n_classes = 4
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = metrics.roc_curve(test_y[:, i], y_pred[:, i])
    roc_auc[i] = metrics.auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = metrics.roc_curve(test_y.ravel(), y_pred.ravel())
roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])

plt.figure()
lw = 2
plt.plot(fpr["micro"], tpr["micro"], color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc["micro"])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()

roc_score = metrics.roc_auc_score(test_y, y_pred)

#print "Loss: {} Accuracy: {}%".format(loss_and_metrics[0],loss_and_metrics[1] * 100)
print "ROC AUC Score: ", roc_score

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


In [None]:
plot_confusion_matrix(conf_mat, classes=paths)