<b>Load preprocessed dataset

In [None]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, Conv2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

num_rows = 257
num_columns = 345
num_channels = 1

**Construct the Model**

In [None]:
x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

In [None]:
# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

**Compile the Model**
<p>Loss function - we will use categorical_crossentropy. This is the most common choice for classification. A lower score indicates that the model is performing better.</p>
<p>Metrics - we will use the accuracy metric which will allow us to view the accuracy score on the validation data when we train the model.</p>
<p>Optimizer - here we will use adam which is a generally good optimizer for many use cases.</p>

In [None]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [None]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

**Training**

In [None]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 20
num_batch_size = 256

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

**Test the model**

In [None]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

**Save the Model**

In [None]:
model.save('saved_models/Spectrogram_Classification_Model.keras')

**Prediction**

In [None]:
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import os


# Slice for common length of 2 seconds
def slice_audio(librosa_audio, librosa_sample_rate = 22050):
    SAMPLE_LENGTH = 2 * librosa_sample_rate

    librosa_audio_sliced = librosa_audio[:SAMPLE_LENGTH]
    if len(librosa_audio) < SAMPLE_LENGTH:
        # print(f"Audio length {len(librosa_audio)} is less than 2 seconds. Padding with zeros.")
        # np.pad specifies the number of values to add at the beginning and the end of the librosa_audio array.
        # 0 -> no padding in the beginning.
        # SAMPLE_LENGTH - len(librosa_audio) -> number of zeros to end, ensuring the total length is 2 seconds.
        librosa_audio_sliced = np.pad(librosa_audio, (0, SAMPLE_LENGTH - len(librosa_audio)), constant_values=0)
    return librosa_audio_sliced


def extract_spectrogram(audio_path):
    
    audio_file, librosa_sample_rate = librosa.load(audio_path, res_type='kaiser_fast')
    audio_file = slice_audio(audio_file, librosa_sample_rate)

    spectrogram = librosa.stft(audio_file, n_fft=512, win_length=512, dtype=np.float32)
    spectrogram = librosa.amplitude_to_db(abs(spectrogram), ref=np.max)
    #librosa.display.specshow(spectrogram, sr=librosa_sample_rate, x_axis='time')

    # spectrogram = tf.expand_dims(spectrogram, axis = 2)

    return spectrogram

In [None]:
def print_prediction(file_name):
    prediction_feature = extract_spectrogram(file_name)
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = np.argmax(model.predict(prediction_feature), axis=-1)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

**Validation**
<p>On new samples<p>

In [None]:
from tensorflow.keras.models import load_model
model = load_model('saved_models/Spectrogram_Classification_Model.keras')

VAL_DIR = "D:\\Code\\ProjectsPython\\ML_TrainingGround\\ML_Audio\\data\\UrbanSound8K\\validation"

In [None]:
# Class: Air Conditioner
filename = os.path.join(VAL_DIR, "air_conditioner.mp3")
print_prediction(filename)

In [None]:
# Class: Car idle
filename = os.path.join(VAL_DIR, "car_idle.mp3")
print_prediction(filename) 

In [None]:
# Class: dog bark
filename = os.path.join(VAL_DIR, "dog_barking.mp3")
print_prediction(filename) 

In [None]:
# Class: drill
filename = os.path.join(VAL_DIR, "drill.mp3")
print_prediction(filename) 

In [None]:
# Class: jackhammer
filename = os.path.join(VAL_DIR, "jackhammer.mp3")
print_prediction(filename) 

In [None]:
# Class: kids playing
filename = os.path.join(VAL_DIR, "kids_playing.mp3")
print_prediction(filename) 

In [None]:
# Class: siren
filename = os.path.join(VAL_DIR, "siren.mp3")
print_prediction(filename) 

In [None]:
# Class: street music
filename = os.path.join(VAL_DIR, "street_music.mp3")
print_prediction(filename) 