In [None]:
import tensorflow as tf

2023-11-09 10:09:28.166865: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-09 10:09:28.197903: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-09 10:09:28.198508: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Define Hyper-Parameters

In [None]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
}

TRAINING_ARGS = {
    'batch_size': 20,
    'initial_learning_rate': 0.01,
    'end_learning_rate': 1.e-5,
    'epochs': 10
}

# Create Train/Val/Test Datasets

In [None]:
train_ds = tf.data.Dataset.list_files('msc-train/*')
val_ds = tf.data.Dataset.list_files('msc-val/*')
test_ds = tf.data.Dataset.list_files('msc-test/*')

In [None]:
from preprocessing import LABELS
from preprocessing import AudioReader
from preprocessing import MelSpectrogram


audio_reader = AudioReader(tf.int16, 16000)
mel_spec_processor = MelSpectrogram(**PREPROCESSING_ARGS)

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)

    return feature, label_id


batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

train_ds = (train_ds
            .map(audio_reader.get_audio_and_label)
            .map(mel_spec_processor.get_mel_spec_and_label)
            .map(prepare_for_training)
            .batch(batch_size)
            .cache())
val_ds = (val_ds
            .map(audio_reader.get_audio_and_label)
            .map(mel_spec_processor.get_mel_spec_and_label)
            .map(prepare_for_training)
            .batch(batch_size))
test_ds = (test_ds
            .map(audio_reader.get_audio_and_label)
            .map(mel_spec_processor.get_mel_spec_and_label)
            .map(prepare_for_training)
            .batch(batch_size))

2023-11-09 10:09:30.466795: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX AVX2 FMA
2023-11-09 10:09:30.468667: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available


In [None]:
for example_batch, example_labels in train_ds.take(1):
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

Batch Shape: (20, 49, 40, 1)
Data Shape: (49, 40, 1)
Labels: tf.Tensor([0 7 7 1 0 3 6 1 0 5 4 4 2 0 3 3 0 3 1 6], shape=(20,), dtype=int64)
2023-11-09 10:09:32.002326: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


# Create the Model


Conv2D(filters=128, kernel_size=[3, 3], stride=[2, 2], use_bias=False, padding=’valid’)
BatchNormalization()
ReLU()
Conv2D(filters=128, kernel_size=[3, 3], stride=[1, 1], use_bias=False, padding=’same’)
BatchNormalization()
ReLU()
Conv2D(filters=128, kernel_size=[3, 3], stride=[1, 1], use_bias=False, padding=’same’)
BatchNormalization()
ReLU()
GlobalAveragePooling2D()
Dense(units=8)
Softmax()


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS)),
    tf.keras.layers.Softmax()
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 19, 128)       1152      
                                                                 
 batch_normalization (Batch  (None, 24, 19, 128)       512       
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 24, 19, 128)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 19, 128)       147456    
                                                                 
 batch_normalization_1 (Bat  (None, 24, 19, 128)       512       
 chNormalization)                                                
                                                                 
 re_lu_1 (ReLU)              (None, 24, 19, 128)       0

# Train the Model

In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']

linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(train_ds) * epochs,
)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

history = model.fit(train_ds, epochs=epochs, validation_data=val_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
history.history

{'loss': [1.9580574035644531,
  1.3824141025543213,
  0.9298322200775146,
  0.7004585266113281,
  0.5568344593048096,
  0.4541385769844055,
  0.3799002766609192,
  0.31913259625434875,
  0.2726048529148102,
  0.23816996812820435],
 'sparse_categorical_accuracy': [0.22984375059604645,
  0.48046875,
  0.6698437333106995,
  0.7631250023841858,
  0.8165624737739563,
  0.8531249761581421,
  0.8824999928474426,
  0.9035937786102295,
  0.9220312237739563,
  0.9353125095367432],
 'val_loss': [2.0115091800689697,
  1.3239790201187134,
  1.2676365375518799,
  0.838029146194458,
  0.6820959448814392,
  0.5248187184333801,
  0.5279343128204346,
  0.5043699741363525,
  0.38984254002571106,
  0.32818907499313354],
 'val_sparse_categorical_accuracy': [0.19374999403953552,
  0.48500001430511475,
  0.550000011920929,
  0.6912500262260437,
  0.7787500023841858,
  0.8324999809265137,
  0.8274999856948853,
  0.831250011920929,
  0.8862500190734863,
  0.9049999713897705]}

{'loss': [2.043762445449829,
  1.5524919033050537,
  1.0171440839767456,
  0.7351239919662476,
  0.5852020382881165,
  0.486852765083313,
  0.40863266587257385,
  0.34691211581230164,
  0.2962386906147003,
  0.25856128334999084],
 'sparse_categorical_accuracy': [0.18062500655651093,
  0.3931249976158142,
  0.6409375071525574,
  0.7503125071525574,
  0.8037499785423279,
  0.839062511920929,
  0.8684375286102295,
  0.8912500143051147,
  0.9082812666893005,
  0.9223437309265137],
 'val_loss': [4.484674453735352,
  1.7968746423721313,
  2.3250250816345215,
  1.5986230373382568,
  1.122878074645996,
  0.7038255333900452,
  0.5258072018623352,
  0.433118999004364,
  0.45025965571403503,
  0.34233108162879944],
 'val_sparse_categorical_accuracy': [0.125,
  0.3174999952316284,
  0.35874998569488525,
  0.49000000953674316,
  0.6262500286102295,
  0.7599999904632568,
  0.8224999904632568,
  0.8612499833106995,
  0.856249988079071,
  0.8987500071525574]}

# Test the model

In [None]:
test_loss, test_accuracy = model.evaluate(test_ds)



In [None]:
training_loss = history.history['loss'][-1]
training_accuracy = history.history['sparse_categorical_accuracy'][-1]
val_loss = history.history['val_loss'][-1]
val_accuracy = history.history['val_sparse_categorical_accuracy'][-1]

print(f'Training Loss: {training_loss:.4f}')
print(f'Training Accuracy: {training_accuracy*100.:.2f}%')
print()
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy*100.:.2f}%')
print()
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy*100.:.2f}%')

Training Loss: 0.2382
Training Accuracy: 93.53%

Validation Loss: 0.3282
Validation Accuracy: 90.50%

Test Loss: 0.2917
Test Accuracy: 90.38%
Training Loss: 0.2586
Training Accuracy: 92.23%

Validation Loss: 0.3423
Validation Accuracy: 89.88%

Test Loss: 0.3037
Test Accuracy: 90.00%


# Save the Model

### Save Keras Model

In [None]:
import os
from time import time

timestamp = int(time())

saved_model_dir = f'./saved_models/{timestamp}'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)
model.save(saved_model_dir)

INFO:tensorflow:Assets written to: ./saved_models/1699521755/assets
INFO:tensorflow:Assets written to: ./saved_models/1699521755/assets
INFO:tensorflow:Assets written to: ./saved_models/1699525857/assets
INFO:tensorflow:Assets written to: ./saved_models/1699525857/assets


### Save Hyper-Parameters and Results

In [None]:
import pandas as pd

output_dict = {
    'timestamp': timestamp,
    **PREPROCESSING_ARGS,
    **TRAINING_ARGS,
    'test_accuracy': test_accuracy
}

df = pd.DataFrame([output_dict])

output_path='./mel_spectrogram_results.csv'
df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6f1fd91f-a434-4542-983d-3ce5ae14ac33' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>