In [1]:
import librosa 
import numpy as np
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [2]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
fulldatasetpath = '../dataset/audio/'

#metadata = pd.read_csv('../dataset/dataset.csv')
metadata = pd.read_csv('../dataset/datasetM2.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    
    class_label = row["class_name"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files') 

KeyboardInterrupt: 

In [64]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

In [65]:
# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [66]:
### store the preprocessed data for use in the next notebook

%store x_train 
%store x_test 
%store y_train 
%store y_test 
%store yy 
%store le

Stored 'x_train' (ndarray)
Stored 'x_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'y_test' (ndarray)
Stored 'yy' (ndarray)
Stored 'le' (LabelEncoder)


In [67]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [68]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [69]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [70]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 256)               10496     
_________________________________________________________________
activation_10 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_11 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)               

In [71]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 350
num_batch_size = 40

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 267 samples, validate on 67 samples
Epoch 1/350

Epoch 00001: val_loss improved from inf to 33.50217, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 2/350

Epoch 00002: val_loss improved from 33.50217 to 16.64023, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 3/350

Epoch 00003: val_loss improved from 16.64023 to 10.42911, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 4/350

Epoch 00004: val_loss improved from 10.42911 to 8.09158, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 5/350

Epoch 00005: val_loss improved from 8.09158 to 5.68102, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 6/350

Epoch 00006: val_loss improved from 5.68102 to 3.81798, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 7/350

Epoch 00007: val_loss improved from 3.81798 to 3.66755, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 8/350

Epoch 00008: val_loss improved from 3.66755 to 2.42592, sa


Epoch 00072: val_loss did not improve from 1.76526
Epoch 73/350

Epoch 00073: val_loss improved from 1.76526 to 1.73787, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 74/350

Epoch 00074: val_loss did not improve from 1.73787
Epoch 75/350

Epoch 00075: val_loss did not improve from 1.73787
Epoch 76/350

Epoch 00076: val_loss did not improve from 1.73787
Epoch 77/350

Epoch 00077: val_loss did not improve from 1.73787
Epoch 78/350

Epoch 00078: val_loss did not improve from 1.73787
Epoch 79/350

Epoch 00079: val_loss did not improve from 1.73787
Epoch 80/350

Epoch 00080: val_loss did not improve from 1.73787
Epoch 81/350

Epoch 00081: val_loss did not improve from 1.73787
Epoch 82/350

Epoch 00082: val_loss did not improve from 1.73787
Epoch 83/350

Epoch 00083: val_loss did not improve from 1.73787
Epoch 84/350

Epoch 00084: val_loss did not improve from 1.73787
Epoch 85/350

Epoch 00085: val_loss did not improve from 1.73787
Epoch 86/350

Epoch 00086: val_loss did n

Epoch 112/350

Epoch 00112: val_loss did not improve from 1.64694
Epoch 113/350

Epoch 00113: val_loss did not improve from 1.64694
Epoch 114/350

Epoch 00114: val_loss did not improve from 1.64694
Epoch 115/350

Epoch 00115: val_loss did not improve from 1.64694
Epoch 116/350

Epoch 00116: val_loss did not improve from 1.64694
Epoch 117/350

Epoch 00117: val_loss improved from 1.64694 to 1.64111, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 118/350

Epoch 00118: val_loss did not improve from 1.64111
Epoch 119/350

Epoch 00119: val_loss did not improve from 1.64111
Epoch 120/350

Epoch 00120: val_loss did not improve from 1.64111
Epoch 121/350

Epoch 00121: val_loss did not improve from 1.64111
Epoch 122/350

Epoch 00122: val_loss did not improve from 1.64111
Epoch 123/350

Epoch 00123: val_loss improved from 1.64111 to 1.62370, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 124/350

Epoch 00124: val_loss improved from 1.62370 to 1.59575, saving model 


Epoch 00186: val_loss improved from 1.36610 to 1.34633, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 187/350

Epoch 00187: val_loss improved from 1.34633 to 1.34024, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 188/350

Epoch 00188: val_loss did not improve from 1.34024
Epoch 189/350

Epoch 00189: val_loss did not improve from 1.34024
Epoch 190/350

Epoch 00190: val_loss did not improve from 1.34024
Epoch 191/350

Epoch 00191: val_loss did not improve from 1.34024
Epoch 192/350

Epoch 00192: val_loss did not improve from 1.34024
Epoch 193/350

Epoch 00193: val_loss improved from 1.34024 to 1.33631, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 194/350

Epoch 00194: val_loss did not improve from 1.33631
Epoch 195/350

Epoch 00195: val_loss did not improve from 1.33631
Epoch 196/350

Epoch 00196: val_loss did not improve from 1.33631
Epoch 197/350

Epoch 00197: val_loss did not improve from 1.33631
Epoch 198/350

Epoch 00198: val_loss


Epoch 00223: val_loss improved from 1.22659 to 1.22440, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 224/350

Epoch 00224: val_loss did not improve from 1.22440
Epoch 225/350

Epoch 00225: val_loss improved from 1.22440 to 1.21155, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 226/350

Epoch 00226: val_loss did not improve from 1.21155
Epoch 227/350

Epoch 00227: val_loss did not improve from 1.21155
Epoch 228/350

Epoch 00228: val_loss did not improve from 1.21155
Epoch 229/350

Epoch 00229: val_loss did not improve from 1.21155
Epoch 230/350

Epoch 00230: val_loss did not improve from 1.21155
Epoch 231/350

Epoch 00231: val_loss did not improve from 1.21155
Epoch 232/350

Epoch 00232: val_loss did not improve from 1.21155
Epoch 233/350

Epoch 00233: val_loss did not improve from 1.21155
Epoch 234/350

Epoch 00234: val_loss did not improve from 1.21155
Epoch 235/350

Epoch 00235: val_loss did not improve from 1.21155
Epoch 236/350

Epoch 00236: val_


Epoch 00297: val_loss improved from 0.95385 to 0.94186, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 298/350

Epoch 00298: val_loss improved from 0.94186 to 0.92909, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 299/350

Epoch 00299: val_loss improved from 0.92909 to 0.92079, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 300/350

Epoch 00300: val_loss improved from 0.92079 to 0.91166, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 301/350

Epoch 00301: val_loss did not improve from 0.91166
Epoch 302/350

Epoch 00302: val_loss did not improve from 0.91166
Epoch 303/350

Epoch 00303: val_loss did not improve from 0.91166
Epoch 304/350

Epoch 00304: val_loss did not improve from 0.91166
Epoch 305/350

Epoch 00305: val_loss improved from 0.91166 to 0.90157, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 306/350

Epoch 00306: val_loss improved from 0.90157 to 0.88528, saving model to saved_models/weights.be

Epoch 335/350

Epoch 00335: val_loss improved from 0.76725 to 0.74920, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 336/350

Epoch 00336: val_loss improved from 0.74920 to 0.72056, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 337/350

Epoch 00337: val_loss did not improve from 0.72056
Epoch 338/350

Epoch 00338: val_loss did not improve from 0.72056
Epoch 339/350

Epoch 00339: val_loss improved from 0.72056 to 0.71514, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 340/350

Epoch 00340: val_loss improved from 0.71514 to 0.71353, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 341/350

Epoch 00341: val_loss did not improve from 0.71353
Epoch 342/350

Epoch 00342: val_loss did not improve from 0.71353
Epoch 343/350

Epoch 00343: val_loss improved from 0.71353 to 0.71145, saving model to saved_models/weights.best.basic_mlp.hdf5
Epoch 344/350

Epoch 00344: val_loss improved from 0.71145 to 0.69925, saving model to saved_mod

In [72]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.7865168452262878
Testing Accuracy:  0.7313432693481445


In [73]:
import librosa 
import numpy as np 

def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None, None

    return np.array([mfccsscaled])


In [74]:
def print_prediction(file_name):
    prediction_feature = extract_feature(file_name) 

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [76]:
# test

filename = '../dataset/audio/duo1.wav' 
print_prediction(filename) 

The predicted class is: Education 

Education 		 :  0.91404402256011962890625000000000
Shopping 		 :  0.00071707600727677345275878906250
double_tap 		 :  0.03254792094230651855468750000000
game 		 :  0.01222573220729827880859375000000
long_tap 		 :  0.00003888266655849292874336242676
social media 		 :  0.00524631282314658164978027343750
swipe 		 :  0.00016034871805459260940551757812
tap 		 :  0.02738735824823379516601562500000
travel 		 :  0.00004298471685615368187427520752
zoom 		 :  0.00758932018652558326721191406250
