In [2]:
import librosa 
import numpy as np
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [3]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
fulldatasetpath = '../dataset/audio/'

#metadata = pd.read_csv('../dataset/dataset.csv')
metadata = pd.read_csv('../dataset/datasetM2.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    
    class_label = row["class_name"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files') 

Finished feature extraction from  207  files


In [4]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

Using TensorFlow backend.


In [5]:
# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [6]:
### store the preprocessed data for use in the next notebook

%store x_train 
%store x_test 
%store y_train 
%store y_test 
%store yy 
%store le

Stored 'x_train' (ndarray)
Stored 'x_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'y_test' (ndarray)
Stored 'yy' (ndarray)
Stored 'le' (LabelEncoder)


In [7]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [8]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [9]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [10]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 256)               10496     
_________________________________________________________________
activation_1 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_2 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 5)                

In [11]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 350
num_batch_size = 40

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 165 samples, validate on 42 samples
Epoch 1/350

Epoch 00001: val_loss improved from inf to 48.41048, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 2/350

Epoch 00002: val_loss improved from 48.41048 to 33.13845, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 3/350

Epoch 00003: val_loss improved from 33.13845 to 10.29185, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 4/350

Epoch 00004: val_loss improved from 10.29185 to 10.04516, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 5/350

Epoch 00005: val_loss did not improve from 10.04516
Epoch 6/350

Epoch 00006: val_loss did not improve from 10.04516
Epoch 7/350

Epoch 00007: val_loss improved from 10.04516 to 7.76904, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 8/350

Epoch 00008: val_loss improved from 7.76904 to 4.93475, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 9/350

Epoch 00009: val_loss did not improve from 4.93475
E


Epoch 00037: val_loss did not improve from 1.00297
Epoch 38/350

Epoch 00038: val_loss did not improve from 1.00297
Epoch 39/350

Epoch 00039: val_loss did not improve from 1.00297
Epoch 40/350

Epoch 00040: val_loss did not improve from 1.00297
Epoch 41/350

Epoch 00041: val_loss did not improve from 1.00297
Epoch 42/350

Epoch 00042: val_loss did not improve from 1.00297
Epoch 43/350

Epoch 00043: val_loss did not improve from 1.00297
Epoch 44/350

Epoch 00044: val_loss did not improve from 1.00297
Epoch 45/350

Epoch 00045: val_loss did not improve from 1.00297
Epoch 46/350

Epoch 00046: val_loss did not improve from 1.00297
Epoch 47/350

Epoch 00047: val_loss did not improve from 1.00297
Epoch 48/350

Epoch 00048: val_loss did not improve from 1.00297
Epoch 49/350

Epoch 00049: val_loss did not improve from 1.00297
Epoch 50/350

Epoch 00050: val_loss did not improve from 1.00297
Epoch 51/350

Epoch 00051: val_loss did not improve from 1.00297
Epoch 52/350

Epoch 00052: val_loss di


Epoch 00078: val_loss did not improve from 1.00297
Epoch 79/350

Epoch 00079: val_loss did not improve from 1.00297
Epoch 80/350

Epoch 00080: val_loss did not improve from 1.00297
Epoch 81/350

Epoch 00081: val_loss did not improve from 1.00297
Epoch 82/350

Epoch 00082: val_loss did not improve from 1.00297
Epoch 83/350

Epoch 00083: val_loss did not improve from 1.00297
Epoch 84/350

Epoch 00084: val_loss did not improve from 1.00297
Epoch 85/350

Epoch 00085: val_loss did not improve from 1.00297
Epoch 86/350

Epoch 00086: val_loss did not improve from 1.00297
Epoch 87/350

Epoch 00087: val_loss did not improve from 1.00297
Epoch 88/350

Epoch 00088: val_loss did not improve from 1.00297
Epoch 89/350

Epoch 00089: val_loss did not improve from 1.00297
Epoch 90/350

Epoch 00090: val_loss did not improve from 1.00297
Epoch 91/350

Epoch 00091: val_loss did not improve from 1.00297
Epoch 92/350

Epoch 00092: val_loss did not improve from 1.00297
Epoch 93/350

Epoch 00093: val_loss di


Epoch 00120: val_loss did not improve from 1.00297
Epoch 121/350

Epoch 00121: val_loss did not improve from 1.00297
Epoch 122/350

Epoch 00122: val_loss did not improve from 1.00297
Epoch 123/350

Epoch 00123: val_loss did not improve from 1.00297
Epoch 124/350

Epoch 00124: val_loss did not improve from 1.00297
Epoch 125/350

Epoch 00125: val_loss did not improve from 1.00297
Epoch 126/350

Epoch 00126: val_loss did not improve from 1.00297
Epoch 127/350

Epoch 00127: val_loss did not improve from 1.00297
Epoch 128/350

Epoch 00128: val_loss did not improve from 1.00297
Epoch 129/350

Epoch 00129: val_loss did not improve from 1.00297
Epoch 130/350

Epoch 00130: val_loss did not improve from 1.00297
Epoch 131/350

Epoch 00131: val_loss did not improve from 1.00297
Epoch 132/350

Epoch 00132: val_loss did not improve from 1.00297
Epoch 133/350

Epoch 00133: val_loss did not improve from 1.00297
Epoch 134/350

Epoch 00134: val_loss did not improve from 1.00297
Epoch 135/350

Epoch 001


Epoch 00161: val_loss did not improve from 1.00297
Epoch 162/350

Epoch 00162: val_loss did not improve from 1.00297
Epoch 163/350

Epoch 00163: val_loss did not improve from 1.00297
Epoch 164/350

Epoch 00164: val_loss did not improve from 1.00297
Epoch 165/350

Epoch 00165: val_loss did not improve from 1.00297
Epoch 166/350

Epoch 00166: val_loss did not improve from 1.00297
Epoch 167/350

Epoch 00167: val_loss did not improve from 1.00297
Epoch 168/350

Epoch 00168: val_loss did not improve from 1.00297
Epoch 169/350

Epoch 00169: val_loss did not improve from 1.00297
Epoch 170/350

Epoch 00170: val_loss did not improve from 1.00297
Epoch 171/350

Epoch 00171: val_loss did not improve from 1.00297
Epoch 172/350

Epoch 00172: val_loss did not improve from 1.00297
Epoch 173/350

Epoch 00173: val_loss improved from 1.00297 to 0.99722, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 174/350

Epoch 00174: val_loss improved from 0.99722 to 0.99457, saving model to saved_m


Epoch 00200: val_loss did not improve from 0.97175
Epoch 201/350

Epoch 00201: val_loss did not improve from 0.97175
Epoch 202/350

Epoch 00202: val_loss did not improve from 0.97175
Epoch 203/350

Epoch 00203: val_loss did not improve from 0.97175
Epoch 204/350

Epoch 00204: val_loss did not improve from 0.97175
Epoch 205/350

Epoch 00205: val_loss did not improve from 0.97175
Epoch 206/350

Epoch 00206: val_loss did not improve from 0.97175
Epoch 207/350

Epoch 00207: val_loss improved from 0.97175 to 0.96475, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 208/350

Epoch 00208: val_loss improved from 0.96475 to 0.96102, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 209/350

Epoch 00209: val_loss improved from 0.96102 to 0.96086, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 210/350

Epoch 00210: val_loss did not improve from 0.96086
Epoch 211/350

Epoch 00211: val_loss improved from 0.96086 to 0.95816, saving model to saved_models/we


Epoch 00237: val_loss did not improve from 0.90741
Epoch 238/350

Epoch 00238: val_loss did not improve from 0.90741
Epoch 239/350

Epoch 00239: val_loss did not improve from 0.90741
Epoch 240/350

Epoch 00240: val_loss did not improve from 0.90741
Epoch 241/350

Epoch 00241: val_loss did not improve from 0.90741
Epoch 242/350

Epoch 00242: val_loss improved from 0.90741 to 0.89529, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 243/350

Epoch 00243: val_loss improved from 0.89529 to 0.88930, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 244/350

Epoch 00244: val_loss improved from 0.88930 to 0.87902, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 245/350

Epoch 00245: val_loss improved from 0.87902 to 0.86131, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 246/350

Epoch 00246: val_loss improved from 0.86131 to 0.85495, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 247/350

Epoch 00247: val_loss improve


Epoch 00273: val_loss did not improve from 0.75154
Epoch 274/350

Epoch 00274: val_loss did not improve from 0.75154
Epoch 275/350

Epoch 00275: val_loss did not improve from 0.75154
Epoch 276/350

Epoch 00276: val_loss improved from 0.75154 to 0.75020, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 277/350

Epoch 00277: val_loss improved from 0.75020 to 0.73815, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 278/350

Epoch 00278: val_loss improved from 0.73815 to 0.73062, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 279/350

Epoch 00279: val_loss improved from 0.73062 to 0.72871, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 280/350

Epoch 00280: val_loss improved from 0.72871 to 0.72308, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 281/350

Epoch 00281: val_loss improved from 0.72308 to 0.72244, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 282/350

Epoch 00282: val_loss did not imp


Epoch 00310: val_loss improved from 0.65595 to 0.65113, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 311/350

Epoch 00311: val_loss improved from 0.65113 to 0.64966, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 312/350

Epoch 00312: val_loss did not improve from 0.64966
Epoch 313/350

Epoch 00313: val_loss improved from 0.64966 to 0.63933, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 314/350

Epoch 00314: val_loss improved from 0.63933 to 0.63035, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 315/350

Epoch 00315: val_loss improved from 0.63035 to 0.61952, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 316/350

Epoch 00316: val_loss improved from 0.61952 to 0.61535, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 317/350

Epoch 00317: val_loss did not improve from 0.61535
Epoch 318/350

Epoch 00318: val_loss did not improve from 0.61535
Epoch 319/350

Epoch 00319: val_loss did not imp


Epoch 00349: val_loss did not improve from 0.61535
Epoch 350/350

Epoch 00350: val_loss improved from 0.61535 to 0.59942, saving model to saved_models/weights.best.basic_mlp.hdf5
Training completed in time:  0:00:06.336345


In [12]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.800000011920929
Testing Accuracy:  0.7142857313156128


In [13]:
import librosa 
import numpy as np 

def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None, None

    return np.array([mfccsscaled])


In [14]:
def print_prediction(file_name):
    prediction_feature = extract_feature(file_name) 

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [46]:
# test

filename = '../dataset/audio/alibaba4.wav' 
print_prediction(filename) 

The predicted class is: tap 

double_tap 		 :  0.00000000000014869849058671280595
long_tap 		 :  0.00000000000000000000000000000000
swipe 		 :  0.00000000000000000000000000967433
tap 		 :  1.00000000000000000000000000000000
zoom 		 :  0.00000000000000000000000000000000
