In [1]:
import glob
import os
import librosa
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
%matplotlib inline
# plt.style.use('ggplot')

  from ._conv import register_converters as _register_converters


In [2]:
seed = 1234
tf.set_random_seed(seed)
np.random.seed(seed)

In [3]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

# Loading model

In [4]:
# model info:

# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# conv2d_1 (Conv2D)            (None, 38, 38, 32)        896       
# _________________________________________________________________
# activation_1 (Activation)    (None, 38, 38, 32)        0         
# _________________________________________________________________
# conv2d_2 (Conv2D)            (None, 36, 36, 32)        9248      
# _________________________________________________________________
# activation_2 (Activation)    (None, 36, 36, 32)        0         
# _________________________________________________________________
# max_pooling2d_1 (MaxPooling2 (None, 18, 18, 32)        0         
# _________________________________________________________________
# dropout_1 (Dropout)          (None, 18, 18, 32)        0         
# _________________________________________________________________
# conv2d_3 (Conv2D)            (None, 16, 16, 64)        18496     
# _________________________________________________________________
# activation_3 (Activation)    (None, 16, 16, 64)        0         
# _________________________________________________________________
# conv2d_4 (Conv2D)            (None, 14, 14, 64)        36928     
# _________________________________________________________________
# activation_4 (Activation)    (None, 14, 14, 64)        0         
# _________________________________________________________________
# max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
# _________________________________________________________________
# dropout_2 (Dropout)          (None, 7, 7, 64)          0         
# _________________________________________________________________
# flatten_1 (Flatten)          (None, 3136)              0         
# _________________________________________________________________
# dense_1 (Dense)              (None, 256)               803072    
# _________________________________________________________________
# activation_5 (Activation)    (None, 256)               0         
# _________________________________________________________________
# dropout_3 (Dropout)          (None, 256)               0         
# _________________________________________________________________
# dense_2 (Dense)              (None, 10)                2570      
# _________________________________________________________________
# activation_6 (Activation)    (None, 10)                0         
# =================================================================
# Total params: 871,210
# Trainable params: 871,210
# Non-trainable params: 0
# _________________________________________________________________

# 0.9M params, about 4MB (keras 10MB)

In [5]:
def exp_evidence(logits): 
    return tf.exp(tf.clip_by_value(logits,-10,10))

#### KL Divergence calculator

def KL(alpha, K):
    beta=tf.constant(np.ones((1,K)),dtype=tf.float32)
    S_alpha = tf.reduce_sum(alpha,axis=1,keepdims=True)
    
    KL = tf.reduce_sum((alpha - beta)*(tf.digamma(alpha)-tf.digamma(S_alpha)),axis=1,keepdims=True) + \
         tf.lgamma(S_alpha) - tf.reduce_sum(tf.lgamma(alpha),axis=1,keepdims=True) + \
         tf.reduce_sum(tf.lgamma(beta),axis=1,keepdims=True) - tf.lgamma(tf.reduce_sum(beta,axis=1,keepdims=True))
    return KL

def mse_loss(p, alpha, K, global_step, annealing_step): 
    S = tf.reduce_sum(alpha, axis=1, keep_dims=True) 
    E = alpha - 1
    m = alpha / S
    
    A = tf.reduce_sum((p-m)**2, axis=1, keep_dims=True) 
    B = tf.reduce_sum(alpha*(S-alpha)/(S*S*(S+1)), axis=1, keep_dims=True) 
    
    annealing_coef = tf.minimum(1.0,tf.cast(global_step/annealing_step,tf.float32))
    
    alp = E*(1-p) + 1 
    C =  annealing_coef * KL(alp, K)
    return (A + B) + C

In [6]:
# define the model
# new network:

# NETWORK PARAMETERS
data_w = 40
data_h = 40
n_classes = 10
n_filters_1 = 32
n_filters_2 = 64
d_filter = 3
p_drop_1 = 0.25
p_drop_2 = 0.50

batch_size = 256
nb_epoch = 20

K= n_classes
num_channels = 3
num_labels = n_classes

lmb = 0.00
omega = 1.0
regularizer = tf.contrib.layers.l2_regularizer(scale=omega)


# new network:

X = tf.placeholder(tf.float32, shape=[None,data_w,data_h,num_channels], name = 'input')
Y = tf.placeholder(tf.float32, shape=[None,num_labels], name = 'label')

keep_prob = tf.placeholder(dtype=tf.float32, name = 'dropout_rate')
global_step = tf.Variable(initial_value=0, name='global_step', trainable=False)
annealing_step = tf.placeholder(dtype=tf.int32, name = 'annealing_step') 

### conv module

# Convolutional Layer #1
conv1 = tf.layers.conv2d(
    inputs=X,
    filters=32,
    strides=(1, 1),
    kernel_size=[3, 3],
    kernel_regularizer=regularizer,
    padding="valid"
    )
conv1_act = tf.nn.relu( conv1 )
# pool1 = tf.layers.max_pooling2d(inputs=act1, pool_size=[3, 3], strides=3)
# dropout1 = tf.layers.dropout(
#     inputs=pool1, rate=0.1)

# Convolutional Layer #2
conv2 = tf.layers.conv2d(
    inputs=conv1_act,
    filters=32,
    strides=(1, 1),
    kernel_size=[3, 3],
    kernel_regularizer=regularizer,
    padding="valid"
    )
# bn2 = tf.layers.batch_normalization(
#     conv2,
#     axis=-1
#     )
conv2_act = tf.nn.relu( conv2 )
conv2_mp = tf.layers.max_pooling2d(inputs=conv2_act, pool_size=[2, 2], strides=2)
dpout1 = tf.layers.dropout(
    inputs=conv2_mp, rate= p_drop_1)



# Convolutional Layer #3
conv3 = tf.layers.conv2d(
    inputs=dpout1,
    filters=64,
    strides=(1, 1),
    kernel_size=[3, 3],
    kernel_regularizer=regularizer,
    padding="valid"
    )
conv3_act = tf.nn.relu( conv3 )

# Convolutional Layer #4
conv4 = tf.layers.conv2d(
    inputs=conv1_act,
    filters=64,
    strides=(1, 1),
    kernel_size=[3, 3],
    kernel_regularizer=regularizer,
    padding="valid"
    )
# bn2 = tf.layers.batch_normalization(
#     conv2,
#     axis=-1
#     )
conv4_act = tf.nn.relu( conv4 )
conv4_mp = tf.layers.max_pooling2d(inputs=conv4_act, pool_size=[2, 2], strides=2)
dpout2 = tf.layers.dropout(
    inputs=conv4_mp, rate= p_drop_1)





### modify dimensions
shape = dpout2.get_shape().as_list()
flat1 = tf.reshape(dpout2, [-1, shape[1] * shape[2]* shape[3]])



### dense module

fc1 = tf.layers.dense(inputs=flat1, 
                          kernel_regularizer=regularizer,
                          units=256)
fc1_act = tf.nn.relu( fc1 )
dpout3 = tf.layers.dropout(
    inputs=fc1_act, rate= p_drop_2)

# Logits Layer
logits = tf.layers.dense(inputs=dpout3, 
                         kernel_regularizer=regularizer,
                         units=n_classes,
                         name = 'logits_tensor')


y_ = tf.nn.softmax(logits,name="softmax_tensor")


prediction = tf.argmax(logits, 1)



########### EDL extension ###########
 
logits2evidence =  exp_evidence ############ modify this function:  relu_evidence  exp_evidence softplus

evidence = logits2evidence(logits)
alpha = evidence + 1

u = K / tf.reduce_sum(alpha, axis=1, keepdims=True)

prob = alpha/tf.reduce_sum(alpha, 1, keepdims=True) 

loss_function = mse_loss  ########### use 5th MSE loss equ: loss_eq5, loss_eq4, loss_eq3, mse_loss

loss = tf.reduce_mean(loss_function(Y, alpha, K, global_step, annealing_step))
l2_loss = tf.losses.get_regularization_loss() * lmb
loss_func = loss + l2_loss

optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss_func, global_step=global_step)

match = tf.reshape(tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1)), tf.float32),(-1,1))
accuracy = tf.reduce_mean(match)


Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [7]:
# best_model_path = "save_model/model_test1.ckpt"
best_model_path = "SaveModel/best_model.ckpt"

saver = tf.train.Saver()
session =  tf.Session()
saver.restore(session, best_model_path)
print("Model restored.")

INFO:tensorflow:Restoring parameters from SaveModel/best_model.ckpt
Model restored.


In [8]:
### loading graph from file

# saver = tf.train.import_meta_graph('save_model/model_test2.ckpt.meta')

# session =  tf.Session()

# saver.restore(session,tf.train.latest_checkpoint(checkpoint_dir = 'save_model/'))
# print("Model restored.")

# Evaluating model 

In [9]:
def one_hot_encode(labels, num_class):
    n_labels = len(labels)
#     n_unique_labels = len(np.unique(labels))
    n_unique_labels = num_class
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [10]:
## GET DATA TO WORK ON
print("Start loading data")

fd = open("data_x.pkl", 'rb')
fd2 = open("data_y.pkl", 'rb')
features = pickle.load(fd)
labels = pickle.load(fd2)

print("Data loaded")

# all testing data
X_test = features[:]
Y_test = labels[:]

X_test = X_test.astype('float32')
X_test /= 255

## one hot encoding
Y_test = one_hot_encode(Y_test, 10)

print(X_test.shape)
print(Y_test.shape)

batch_size = 256

n_batches_test = Y_test.shape[0]//batch_size
print('For batch of size %d: \n %d batches in test'%(batch_size, n_batches_test))

Start loading data
Data loaded
(54154, 40, 40, 3)
(54154, 10)
For batch of size 256: 
 211 batches in test


In [11]:
pred_y_list = np.zeros(0)

acc_list = []
loss_list = []

#       Performance on testing dataset:
for i in range(n_batches_test):
    if i == n_batches_test-1:
        batch_x = X_test[i * batch_size:, :, :, :]
        batch_y = Y_test[i * batch_size:, :]
    else:
        offset = (i * batch_size) % (Y_test.shape[0] - batch_size)
        batch_x = X_test[offset:(offset + batch_size), :, :, :]
        batch_y = Y_test[offset:(offset + batch_size), :]

#     logits, y_pred = session.run([logits, prediction], feed_dict={X: batch_x, Y : batch_y})
    y_pred, acc, c = session.run([prediction, accuracy, loss_func], feed_dict={X: batch_x, Y : batch_y, keep_prob:1.,  annealing_step:100*n_batches_test})

    print('epoch %d - %d%% -%f) '% (i+1, (100*(i+1))//n_batches_test, acc), end='\r' if i<n_batches_test-1 else '')
#     y_pred = np.argmax(logits, axis=1)
#     pred_y_list.append(y_pred)
    pred_y_list = np.concatenate([pred_y_list, y_pred])
    
    acc_list.append(acc)
    loss_list.append(c)
#     test_acc = np.array(np.array(acc_list).mean())
#     test_loss = np.array(np.array(loss_list).sum())
    

test_acc = np.array(np.array(acc_list).mean())
test_loss = np.array(np.array(loss_list).sum())
print('Testing:\t  Loss: %2.4f \t Accuracy: %2.4f' % (test_loss/Y_test.shape[0], test_acc))

epoch 211 - 100% -0.901015) Testing:	  Loss: 0.0010 	 Accuracy: 0.9293


In [12]:
y_true = np.argmax(Y_test, 1)
y_pred =pred_y_list

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

print('Accuracy on testing data:',sum(y_pred==y_true)/y_true.shape[0])
cf_matrix = confusion_matrix(y_true, y_pred)
print(cf_matrix)
class_wise_f1 = np.round(f1_score(y_true, y_pred, average=None)*100)*0.01
print('the mean-f1 score: {:.2f}'.format(np.mean(class_wise_f1)))

Accuracy on testing data: 0.9292757691029286
[[6409    3   72  136  129    4   22   29   74  112]
 [  11 1579   23   12   19    8    0   10    2   15]
 [  24    5 6567   99   19   20   12   17   25  135]
 [  42    3  121 4915   19   14   24   20   30   84]
 [  34   16   73   75 5613   20   20  101   37   76]
 [ 225   15   52  182   46 5758  162  248   50  134]
 [   6    0   11   29    9    3  742    0   13    5]
 [  26    1   12   20   15    3    2 6064    2   56]
 [  11    5   42   35    3    6    3   11 6202   16]
 [  63    8  159  135   31   28   10   23   68 6475]]
the mean-f1 score: 0.92


In [13]:
# About classes:
# 0	air_conditioner
# 1	car_horn
# 2	children_playing
# 3	dog_bark
# 4	drilling
# 5	engine_idling
# 6	gun_shot
# 7	jackhammer
# 8	siren
# 9	street_music

# Uncertainty output

In [14]:
pred_y_list = np.zeros(0)
uncertainty_y_list = np.zeros([0,1])

acc_list = []
loss_list = []

#       Performance on testing dataset:
for i in range(n_batches_test):
    if i == n_batches_test-1:
        batch_x = X_test[i * batch_size:, :, :, :]
        batch_y = Y_test[i * batch_size:, :]
    else:
        offset = (i * batch_size) % (Y_test.shape[0] - batch_size)
        batch_x = X_test[offset:(offset + batch_size), :, :, :]
        batch_y = Y_test[offset:(offset + batch_size), :]

#     logits, y_pred = session.run([logits, prediction], feed_dict={X: batch_x, Y : batch_y})
    y_pred, acc, c, uncertainty = session.run([prediction, accuracy, loss_func, u], feed_dict={X: batch_x, Y : batch_y, keep_prob:1.,  annealing_step:100*n_batches_test})

    print('epoch %d - %d%% -%f) '% (i+1, (100*(i+1))//n_batches_test, acc), end='\r' if i<n_batches_test-1 else '')
#     y_pred = np.argmax(logits, axis=1)
#     pred_y_list.append(y_pred)
    pred_y_list = np.concatenate([pred_y_list, y_pred])
    uncertainty_y_list = np.concatenate([uncertainty_y_list, uncertainty])
    
    acc_list.append(acc)
    loss_list.append(c)
#     test_acc = np.array(np.array(acc_list).mean())
#     test_loss = np.array(np.array(loss_list).sum())
    

test_acc = np.array(np.array(acc_list).mean())
test_loss = np.array(np.array(loss_list).sum())
print('Testing:\t  Loss: %2.4f \t Accuracy: %2.4f' % (test_loss/Y_test.shape[0], test_acc))

epoch 211 - 100% -0.901015) Testing:	  Loss: 0.0010 	 Accuracy: 0.9293


In [15]:
for i in range(15):
# for i in range(pred_y_list.shape[0]):
    if np.argmax(Y_test[i]) != pred_y_list[i]:
        print(np.argmax(Y_test[i]), '\t', pred_y_list[i], '\t', uncertainty_y_list[i], '\t!!!')
    else:
        print(np.argmax(Y_test[i]), '\t', pred_y_list[i], '\t', uncertainty_y_list[i])

0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
0 	 0.0 	 [0.00045379]
3 	 3.0 	 [0.12549192]
3 	 3.0 	 [0.00045379]
5 	 9.0 	 [0.9929685] 	!!!
5 	 9.0 	 [0.99995458] 	!!!
5 	 9.0 	 [0.99995458] 	!!!
5 	 7.0 	 [0.99995458] 	!!!
5 	 9.0 	 [0.99995232] 	!!!
5 	 9.0 	 [0.99995458] 	!!!


# Processing new data

In [16]:
## AUDIO DATA PROCESSING
import os
import librosa
import pickle

window_size = 512
## This for mel spectogram resolution
n_bands = 60
n_mfcc = 40
n_frames = 40

def windows(data, n_frames):
    ws = window_size * (n_frames - 1)
    start = 0
    while start < len(data):
        yield start, start + ws, ws
        start += (ws / 2)
        ## OVERLAP OF 50%
## END windows

In [17]:
def data_processing(work_dir):
#     work_dir = "gta_sound"
    raw_features = []
    _labels = []
    cnt = 0

    print("Working on dir: ", work_dir)


    for fs in os.listdir(work_dir ):
    #     if ".wav" not in fs: continue
        # print("Try Loading file: ", fs)
        sound_clip, sr = librosa.load(work_dir + "/" + fs)
        label = 6
        print(cnt, "Try Loading file: ", fs, " class: ", label)
        cnt += 1
        ## Work of file bacthes
        for (start, end, ws) in windows(sound_clip, n_frames):
            ## Get the sound part
            signal = sound_clip[int(start): int(end)]
            if len(signal) == ws:
                mfcc_spec = librosa.feature.mfcc(signal, n_mfcc=n_mfcc, n_mels=n_bands)
    #             print(mfcc_spec.shape)
                mfcc_spec = mfcc_spec.T.flatten()[:, np.newaxis].T
    #             print(mfcc_spec.shape)
                raw_features.append(mfcc_spec)
                _labels.append(label)

    print("Loaded ", cnt, " files")
    ## Add a new dimension
    raw_features = np.asarray(raw_features).reshape(len(raw_features), n_mfcc, n_frames, 1)



    ## Concate 2 elements on axis=3
    _features = np.concatenate((raw_features, np.zeros(np.shape(raw_features))), axis=3)

    _features = np.concatenate((_features, np.zeros(np.shape(raw_features))), axis=3)


    for i in range(len(_features)):
        _features[i, :, :, 1] = librosa.feature.delta(order=1, data=_features[i, :, :, 0])
        _features[i, :, :, 2] = librosa.feature.delta(order=2, data=_features[i, :, :, 0])

    # normalize, one-hot data
    test_x = _features
    test_x = test_x.astype('float32')
    test_x /= 255

    test_y = one_hot_encode(np.array(_labels), 10)
    print(test_x.shape, test_y.shape)
    
    return test_x, test_y

In [18]:
# work_dir = 'gun_shot'
work_dir = 'gta_sound'

test_x, test_y = data_processing(work_dir)

Working on dir:  gta_sound
0 Try Loading file:  pistol.wav  class:  6
1 Try Loading file:  sniper_m14.wav  class:  6
2 Try Loading file:  pistol_magnum.wav  class:  6
3 Try Loading file:  rifle_ak47_single_fire.wav  class:  6
4 Try Loading file:  sniper_g3sg1.wav  class:  6
5 Try Loading file:  rifle_m16_single_fire.wav  class:  6
6 Try Loading file:  rifle_m60_single_fire.wav  class:  6
7 Try Loading file:  smg_uzi_singl_fire.wav  class:  6
Loaded  8  files
(17, 40, 40, 3) (17, 10)


  b = a[a_slice]


In [19]:
# fd = open("gun_x.pkl", 'wb')
# pickle.dump(_features, fd)
# fd2 = open("gun_y.pkl", 'wb')
# pickle.dump(one_hot_encode(np.array(_labels), 10), fd2)

# Testing on trained model

In [20]:
# ## GET DATA TO WORK ON
# print("Start loading data")

# fd = open("org_gun_x.pkl", 'rb')
# fd2 = open("org_gun_y.pkl", 'rb')
# test_x = np.array(pickle.load(fd))
# test_x = test_x.astype('float32')
# test_x /= 255

# test_y = np.array(pickle.load(fd2))
# test_y = one_hot_encode(np.array(test_y), 10)
# print("Data loaded")
# print(test_x.shape, test_y.shape)

In [21]:
y_pred, acc, c, uncertainty = session.run([prediction, accuracy, loss_func, u], feed_dict={X: test_x, Y : test_y, keep_prob:1.,  annealing_step:100*n_batches_test})
print('Accuracy is: ',acc)

Accuracy is:  0.8235294


In [22]:
print('True \t Predict \t Uncertainty \t Match')
for i in range(y_pred.shape[0]):
# for i in range(pred_y_list.shape[0]):
    if np.argmax(test_y[i]) != y_pred[i]:
        print(np.argmax(test_y[i]), ' \t', y_pred[i], '\t\t ', uncertainty_y_list[i], '    !!!')
    else:
        print(np.argmax(test_y[i]), ' \t', y_pred[i], '\t\t ', uncertainty_y_list[i])

True 	 Predict 	 Uncertainty 	 Match
6  	 6 		  [0.00045379]
6  	 6 		  [0.00045379]
6  	 6 		  [0.00045379]
6  	 6 		  [0.00045379]
6  	 6 		  [0.00045379]
6  	 3 		  [0.00045379]     !!!
6  	 6 		  [0.00045379]
6  	 6 		  [0.12549192]
6  	 4 		  [0.00045379]     !!!
6  	 6 		  [0.9929685]
6  	 6 		  [0.99995458]
6  	 3 		  [0.99995458]     !!!
6  	 6 		  [0.99995458]
6  	 6 		  [0.99995232]
6  	 6 		  [0.99995458]
6  	 6 		  [0.99995458]
6  	 6 		  [0.00045379]


In [23]:
# About classes:
# 0	air_conditioner
# 1	car_horn
# 2	children_playing
# 3	dog_bark
# 4	drilling
# 5	engine_idling
# 6	gun_shot
# 7	jackhammer
# 8	siren
# 9	street_music