<b>Load preprocessed dataset

In [2]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [3]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, Conv2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

num_rows = 40
num_columns = 174
num_channels = 1

**Construct the Model**

In [4]:
x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

In [23]:
# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

**Compile the Model**
<p>Loss function - we will use categorical_crossentropy. This is the most common choice for classification. A lower score indicates that the model is performing better.</p>
<p>Metrics - we will use the accuracy metric which will allow us to view the accuracy score on the validation data when we train the model.</p>
<p>Optimizer - here we will use adam which is a generally good optimizer for many use cases.</p>

In [24]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [25]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 39, 173, 16)       80        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 19, 86, 16)       0         
 )                                                               
                                                                 
 dropout_4 (Dropout)         (None, 19, 86, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 18, 85, 32)        2080      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 9, 42, 32)        0         
 2D)                                                             
                                                                 
 dropout_5 (Dropout)         (None, 9, 42, 32)        

**Training**

In [26]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 72
num_batch_size = 256

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/72
Epoch 1: val_loss improved from inf to 2.16482, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 2/72
Epoch 2: val_loss improved from 2.16482 to 1.95800, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 3/72
Epoch 3: val_loss improved from 1.95800 to 1.80728, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 4/72
Epoch 4: val_loss improved from 1.80728 to 1.61695, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 5/72
Epoch 5: val_loss improved from 1.61695 to 1.51894, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 6/72
Epoch 6: val_loss improved from 1.51894 to 1.43425, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 7/72
Epoch 7: val_loss improved from 1.43425 to 1.36653, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 8/72
Epoch 8: val_loss improved from 1.36653 to 1.30654, saving model to saved_models\weights.best.basic_cnn.hdf5
Epoch 9/72
Epoch 9: val_loss improved from 1

**Test the model**

In [27]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9408733248710632
Testing Accuracy:  0.8998282551765442


**Save the Model**

In [66]:
model.save('saved_models/MFCC_Classification_Model.keras')

**Prediction**

In [5]:
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import os

max_pad_len = 174

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

In [6]:
def print_prediction(file_name):
    prediction_feature = extract_features(file_name)
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = np.argmax(model.predict(prediction_feature), axis=-1)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

**Validation**
<p>On new samples<p>

In [12]:
from tensorflow.keras.models import load_model
model = load_model('saved_models/MFCC_Classification_Model.keras')

VAL_DIR = "D:\\Code\\ProjectsPython\\ML_TrainingGround\\ML_Audio\\data\\UrbanSound8K\\validation"

In [13]:
# Class: Air Conditioner
filename = os.path.join(VAL_DIR, "air_conditioner.mp3")
print_prediction(filename)

The predicted class is: drilling 

air_conditioner 		 :  0.00775969773530960083007812500000
car_horn 		 :  0.03981656581163406372070312500000
children_playing 		 :  0.00872580241411924362182617187500
dog_bark 		 :  0.01070435158908367156982421875000
drilling 		 :  0.39637118577957153320312500000000
engine_idling 		 :  0.06566824018955230712890625000000
gun_shot 		 :  0.04042046889662742614746093750000
jackhammer 		 :  0.29831093549728393554687500000000
siren 		 :  0.01266386173665523529052734375000
street_music 		 :  0.11955891549587249755859375000000


In [14]:
# Class: Car idle
filename = os.path.join(VAL_DIR, "car_idle.mp3")
print_prediction(filename) 

The predicted class is: engine_idling 

air_conditioner 		 :  0.04178266599774360656738281250000
car_horn 		 :  0.04754407703876495361328125000000
children_playing 		 :  0.01624593324959278106689453125000
dog_bark 		 :  0.01987972669303417205810546875000
drilling 		 :  0.02580863609910011291503906250000
engine_idling 		 :  0.82289588451385498046875000000000
gun_shot 		 :  0.00524621084332466125488281250000
jackhammer 		 :  0.01378475688397884368896484375000
siren 		 :  0.00131086620967835187911987304688
street_music 		 :  0.00550126517191529273986816406250


In [15]:
# Class: dog bark
filename = os.path.join(VAL_DIR, "dog_barking.mp3")
print_prediction(filename) 

The predicted class is: dog_bark 

air_conditioner 		 :  0.00000177171227733197156339883804
car_horn 		 :  0.00404418306425213813781738281250
children_playing 		 :  0.00969238765537738800048828125000
dog_bark 		 :  0.92599302530288696289062500000000
drilling 		 :  0.00105791003443300724029541015625
engine_idling 		 :  0.00887636747211217880249023437500
gun_shot 		 :  0.00688256043940782546997070312500
jackhammer 		 :  0.00015988650557119399309158325195
siren 		 :  0.04185850545763969421386718750000
street_music 		 :  0.00143336842302232980728149414062


In [16]:
# Class: drill
filename = os.path.join(VAL_DIR, "drill.mp3")
print_prediction(filename) 

The predicted class is: drilling 

air_conditioner 		 :  0.00288114114664494991302490234375
car_horn 		 :  0.00231178663671016693115234375000
children_playing 		 :  0.00000495588255944312550127506256
dog_bark 		 :  0.00000113318719741073437035083771
drilling 		 :  0.98871070146560668945312500000000
engine_idling 		 :  0.00424196571111679077148437500000
gun_shot 		 :  0.00002254247010569088160991668701
jackhammer 		 :  0.00090685067698359489440917968750
siren 		 :  0.00000004880667603401889209635556
street_music 		 :  0.00091891252668574452400207519531


In [17]:
# Class: jackhammer
filename = os.path.join(VAL_DIR, "jackhammer.mp3")
print_prediction(filename) 

The predicted class is: drilling 

air_conditioner 		 :  0.00041314447298645973205566406250
car_horn 		 :  0.00989797059446573257446289062500
children_playing 		 :  0.00353560713119804859161376953125
dog_bark 		 :  0.09250296652317047119140625000000
drilling 		 :  0.78502619266510009765625000000000
engine_idling 		 :  0.00046125479275360703468322753906
gun_shot 		 :  0.00016873616550583392381668090820
jackhammer 		 :  0.09566677361726760864257812500000
siren 		 :  0.01040329877287149429321289062500
street_music 		 :  0.00192395353224128484725952148438


In [18]:
# Class: kids playing
filename = os.path.join(VAL_DIR, "kids_playing.mp3")
print_prediction(filename) 

The predicted class is: children_playing 

air_conditioner 		 :  0.00000056856782748582190833985806
car_horn 		 :  0.00284002278931438922882080078125
children_playing 		 :  0.91597360372543334960937500000000
dog_bark 		 :  0.03033817000687122344970703125000
drilling 		 :  0.00001540888115414418280124664307
engine_idling 		 :  0.00018272578017786145210266113281
gun_shot 		 :  0.00000779726997279794886708259583
jackhammer 		 :  0.00000126601707961526699364185333
siren 		 :  0.04929034411907196044921875000000
street_music 		 :  0.00135017675347626209259033203125


In [19]:
# Class: siren
filename = os.path.join(VAL_DIR, "siren.mp3")
print_prediction(filename) 

The predicted class is: siren 

air_conditioner 		 :  0.00000000000337063775328327874803
car_horn 		 :  0.00000140776705848111305385828018
children_playing 		 :  0.00008175824041245505213737487793
dog_bark 		 :  0.00259436317719519138336181640625
drilling 		 :  0.00000037286602605490770656615496
engine_idling 		 :  0.00000010999154653745790710672736
gun_shot 		 :  0.00000000014435720674388363704566
jackhammer 		 :  0.00000000013746707938633306866905
siren 		 :  0.99731534719467163085937500000000
street_music 		 :  0.00000671627913106931373476982117


In [20]:
# Class: street music
filename = os.path.join(VAL_DIR, "street_music.mp3")
print_prediction(filename) 

The predicted class is: dog_bark 

air_conditioner 		 :  0.00024132610997185111045837402344
car_horn 		 :  0.00636235438287258148193359375000
children_playing 		 :  0.14681123197078704833984375000000
dog_bark 		 :  0.47498932480812072753906250000000
drilling 		 :  0.00413822336122393608093261718750
engine_idling 		 :  0.00144013238605111837387084960938
gun_shot 		 :  0.07700029760599136352539062500000
jackhammer 		 :  0.23638123273849487304687500000000
siren 		 :  0.01726490631699562072753906250000
street_music 		 :  0.03537097945809364318847656250000
