# Training a neural network to classify images of MFCCs

## Load dataset

In [64]:

import os

current_dir = os.getcwd()
directory = os.path.dirname(current_dir) + "/datasets/"
csv_files_train = [directory + "/train/" + f for f in os.listdir(directory + "train/") if f.endswith('.csv')]
csv_files_validation = [directory + "/validation/" + f for f in os.listdir(directory + "validation/") if f.endswith('.csv')]

print("training files: ", len(csv_files_train))
print("validation files: ", len(csv_files_validation))


training files:  1672
validation files:  456


Test import of csv datasets into tensorflow datasets

import every csv file as a single matrix with one label associated

In [65]:
import tensorflow as tf

# Print TensorFlow version
print("TensorFlow version:", tf.__version__)

# Check if GPU is available and being used
print(tf.config.list_physical_devices('GPU'))



TensorFlow version: 2.13.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [66]:
import numpy as np

# create training and validation datasets
dataset_train = []
dataset_validation = []
labels_train = []
labels_validation = []

# read csv files into lists
# the label (language) is written in the file name

for file in csv_files_train:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_train.append(data_array)
    
    file_name = os.path.basename(file)
    labels_train.append(file_name[5:8])

for file in csv_files_validation:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_validation.append(data_array)

    file_name = os.path.basename(file)
    labels_validation.append(file_name[5:8])

print("dataset train size: ", len(dataset_train))
print("dataset validation size: ", len(dataset_validation))
print("labels train size: ", len(labels_train))
print("labels validation size: ", len(labels_validation))

dataset train size:  1672
dataset validation size:  456
labels train size:  1672
labels validation size:  456


In [67]:
# print size of one element of the dataset: feature size
mfcc_size = dataset_train[0].shape
print ("mfcc_size: ", mfcc_size)

mfcc_size:  (349, 12)


In [68]:
classes = ["ita", "eng"]

# Create a mapping from class names to integer labels
class_to_index = {class_name: index for index, class_name in enumerate(classes)}
print(class_to_index)

# Convert labels to integer labels using the mapping
integer_labels_train = np.array([class_to_index[label] for label in labels_train], dtype=np.int8)
integer_labels_validation = np.array([class_to_index[label] for label in labels_validation], dtype=np.int8)

y_onehot_train = tf.keras.utils.to_categorical(integer_labels_train, num_classes = len(classes)) # one hot encoding
y_onehot_validation = tf.keras.utils.to_categorical(integer_labels_validation, num_classes = len(classes)) # one hot encoding



{'ita': 0, 'eng': 1}


In [69]:

x_train = tf.reshape(dataset_train, (-1, mfcc_size[0], mfcc_size[1], 1))
x_validation = tf.reshape(dataset_validation, (-1, mfcc_size[0], mfcc_size[1], 1))

print("Training features shape:", x_train.shape)
print("Validation features shape:", x_validation.shape)

# create tensorflow dataset from numpy arrays
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_onehot_train))
val_dataset = tf.data.Dataset.from_tensor_slices((x_validation, y_onehot_validation))


Training features shape: (1672, 349, 12, 1)
Validation features shape: (456, 349, 12, 1)


In [70]:
batch_size = 32
num_epochs = 150
mfcc_shape = (mfcc_size[0], mfcc_size[1], 1)

# shuffle and batch
train_dataset = train_dataset.shuffle(len(x_train))

# apply batching to the datasets
val_dataset = val_dataset.batch(batch_size)
train_dataset = train_dataset.batch(batch_size)

In [71]:
for image_batch, labels_batch in train_dataset:
	print("MFCC batch input feature shape: ", image_batch.shape)
	print("MFCC labels shape: ", labels_batch.shape)
	break

MFCC batch input feature shape:  (32, 349, 12, 1)
MFCC labels shape:  (32, 2)


## Models training and evaluation

In [103]:
# prints learning rate during training
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

# learning rate scheduler with polynomial decay
learning_rate_scheduler = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=0.002,
    decay_steps=1500,
    end_learning_rate=1e-4,
    power=0.5
)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate_scheduler)
lr_metric = get_lr_metric(optimizer)


In [23]:
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping


# Create a basic CNN model
model = models.Sequential([
	layers.Conv2D(filters=16, kernel_size=(5, 1), activation='relu', input_shape=mfcc_shape),
	layers.MaxPooling2D(pool_size=(2, 1)),
    #layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu'),
	#layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=16, kernel_size=(3, 1), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    #layers.GlobalAveragePooling2D(),
	layers.AveragePooling2D(pool_size=(41, 5)),
	layers.Flatten(),
	layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
	layers.Dense(2, activation='softmax')  # Two classes
])

model.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 345, 12, 16)       96        
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 172, 12, 16)       0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 170, 12, 16)       784       
                                                                 
 max_pooling2d_10 (MaxPooli  (None, 85, 12, 16)        0         
 ng2D)                                                           
                                                                 
 conv2d_11 (Conv2D)          (None, 83, 10, 16)        2320      
                                                                 
 max_pooling2d_11 (MaxPooli  (None, 41, 5, 16)        

In [104]:
# create new CNN model
input_layer = tf.keras.layers.Input(shape=mfcc_shape, name='input_layer')
conv1 = tf.keras.layers.Conv2D(16, (5, 1), activation='relu', padding='same', name='conv1')(input_layer)
pool1 = tf.keras.layers.MaxPooling2D((2, 1), name='pool1')(conv1)
conv2 = tf.keras.layers.Conv2D(16, (1, 5), activation='relu', padding='same', name='conv2')(pool1)
pool2 = tf.keras.layers.MaxPooling2D((2, 1), name='pool2')(conv2)
conv3 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same', name='conv3')(pool2)
pool3 = tf.keras.layers.MaxPooling2D((2, 2), name='pool3')(conv3)
avg_pool = tf.keras.layers.AveragePooling2D(pool_size=(16, 2), name='avg_pool')(pool3)
flatten = tf.keras.layers.Flatten(name='flatten')(avg_pool)
dense1 = tf.keras.layers.Dense(64, activation='relu', name='dense1', kernel_regularizer='l2')(flatten)
dense2 = tf.keras.layers.Dense(32, activation='relu', name='dense2', kernel_regularizer='l2')(dense1)
output_layer = tf.keras.layers.Dense(2, activation='softmax', name='output_layer')(dense2)

model_2 = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
model_2.summary()


Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 349, 12, 1)]      0         
                                                                 
 conv1 (Conv2D)              (None, 349, 12, 16)       96        
                                                                 
 pool1 (MaxPooling2D)        (None, 174, 12, 16)       0         
                                                                 
 conv2 (Conv2D)              (None, 174, 12, 16)       1296      
                                                                 
 pool2 (MaxPooling2D)        (None, 87, 12, 16)        0         
                                                                 
 conv3 (Conv2D)              (None, 87, 12, 16)        2320      
                                                                 
 pool3 (MaxPooling2D)        (None, 43, 6, 16)         0  

In [24]:

# Compile the model
model.compile(optimizer = tf.keras.optimizers.Adam(),
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy', lr_metric])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=20, verbose=1, restore_best_weights=True)

callbacks_list = [early_stopping]

# Train the model
model.fit(x=train_dataset, epochs=num_epochs, callbacks=callbacks_list, validation_data=val_dataset)



Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7fdc8c117fd0>

In [105]:
# Compile the model 2
model_2.compile(optimizer = optimizer,
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy', lr_metric])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=20, verbose=1, restore_best_weights=True)

callbacks_list = [early_stopping]

# Train the model
model_2.fit(x=train_dataset, epochs=num_epochs, callbacks=callbacks_list, validation_data=val_dataset)

Epoch 1/150


Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 39: early stopping


<keras.src.callbacks.History at 0x7fdd2c6d9e10>

In [101]:
# evaluate model on test set

evaluation = model.evaluate(val_dataset, batch_size=32)
evaluation = dict(zip(model.metrics_names, evaluation))
print(evaluation)

 1/15 [=>............................] - ETA: 0s - loss: 0.3191 - accuracy: 0.8750 - lr: 0.0018

{'loss': 0.38049131631851196, 'accuracy': 0.8289473652839661, 'lr': 0.0017641539452597499}


In [106]:
evaluation_2 = model_2.evaluate(val_dataset, batch_size=32)
evaluation_2 = dict(zip(model_2.metrics_names, evaluation_2))
print(evaluation_2)

 1/15 [=>............................] - ETA: 0s - loss: 0.4134 - accuracy: 0.9688 - lr: 1.0000e-04

{'loss': 0.6125695109367371, 'accuracy': 0.8552631735801697, 'lr': 9.999997564591467e-05}


In [107]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
model_2.save(filepath +  "CNN_model")

INFO:tensorflow:Assets written to: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


INFO:tensorflow:Assets written to: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


## Model Conversion to Tensorflow Lite

In [108]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(filepath + "CNN_model")
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS_INT8,  # enable TensorFlow Lite ops.
    #tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]

converter.experimental_enable_resource_variables = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]

def representative_dataset(num_samples = x_train.shape[0]):
    for x, y in train_dataset.take(num_samples):
    	yield [tf.cast(x, dtype=tf.float32)]

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset

tflite_model = converter.convert()

# write the converted model into a file
with open(filepath + "CNN_model.tflite", 'wb') as f:
	f.write(tflite_model)

2023-08-19 17:05:54.873209: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-08-19 17:05:54.873307: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-08-19 17:05:54.885952: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-19 17:05:54.889585: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2023-08-19 17:05:54.889618: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-19 17:05:54.902240: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-08-19 17:05:54.983588: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /home/simon/Spoken_Language_R

In [109]:
model_path = filepath + "CNN_model.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)

[{'name': 'serving_default_input_layer:0', 'index': 0, 'shape': array([  1, 349,  12,   1], dtype=int32), 'shape_signature': array([ -1, 349,  12,   1], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (1.0, 0), 'quantization_parameters': {'scales': array([1.], dtype=float32), 'zero_points': array([0], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 25, 'shape': array([1, 2], dtype=int32), 'shape_signature': array([-1,  2], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128), 'quantization_parameters': {'scales': array([0.00390625], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [110]:
# Assuming single input and output tensors.
input_shape = input_details[0]['shape']
output_shape = output_details[0]['shape']

print(input_shape)
print(output_shape)

[  1 349  12   1]
[1 2]


Convert the TFLite model into a TFMicro model using the bash script below 

In [None]:
!xxd -i ./../model_lite/CNN_model.tflite > ./../model_lite/model_tflite_data.cc

/bin/bash: /home/simon/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)


## Evaluate quantized model on test sets

In [135]:
def tflite_model_evaluation(x_test, y_onehot_test):

	# convert test set into int8
	x_test_int8 = tf.convert_to_tensor(tf.cast(x_test, tf.int8))
	y_test_int8 = tf.convert_to_tensor(tf.cast(y_onehot_test, tf.int8))
	# Convert the random data point from int8 to float32
	batch_size = len(x_test_int8)

	eval_shape = (1, mfcc_size[0], mfcc_size[1], 1)
	print("x shape = ", x_test_int8.shape)
	print("y shape =", y_test_int8.shape)

	predictions = np.empty( (y_test_int8.shape) )

	for i in range(batch_size):
		# Set input data to the interpreter.
		interpreter.set_tensor(input_details[0]['index'], tf.reshape(x_test_int8[i], eval_shape) )

		# Run inference.
		interpreter.invoke()

		# Get output data from the interpreter.
		output_data = interpreter.get_tensor(output_details[0]['index'])

		# get quantization parameters from output_details
		output_scale, output_zero_point = output_details[0]['quantization']

		# scale and zero point dequantization to get probabilities
		output_probability = tf.math.softmax(output_data / output_scale + output_zero_point)
		predictions[i] = output_probability.numpy()

	accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test_int8, axis=1) ) / batch_size
	print("Accuracy in test set of quantized TFLite model: {:.4f}".format(accuracy))

In [138]:
# evaluate validation set with post training quantized model
tflite_model_evaluation(x_test = x_validation, y_onehot_test = y_onehot_validation)


x shape =  (456, 349, 12, 1)
y shape = (456, 2)
Accuracy in test set of quantized TFLite model: 0.8509
