## Training a neural network to classify images of MFCCs

### Load dataset

In [1]:

import os

current_dir = os.getcwd()
directory = os.path.dirname(current_dir) + "/datasets/"
csv_files_train = [directory + "/train/" + f for f in os.listdir(directory + "train/") if f.endswith('.csv')]
csv_files_validation = [directory + "/validation/" + f for f in os.listdir(directory + "validation/") if f.endswith('.csv')]

print("training files: ", len(csv_files_train))
print("validation files: ", len(csv_files_validation))


training files:  770
validation files:  210


Test import of csv datasets into tensorflow datasets

import every csv file as a single matrix with one label associated

In [2]:
import tensorflow as tf

# Print TensorFlow version
print("TensorFlow version:", tf.__version__)

# Check if GPU is available and being used
print(tf.config.list_physical_devices('GPU'))



2023-08-18 16:15:37.330490: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version: 2.12.0
[]


In [3]:
import numpy as np

# create training and validation datasets
dataset_train = []
dataset_validation = []
labels_train = []
labels_validation = []

# read csv files into lists
# the label (language) is written in the file name

for file in csv_files_train:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_train.append(data_array)
    
    file_name = os.path.basename(file)
    labels_train.append(file_name[5:8])

for file in csv_files_validation:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_validation.append(data_array)

    file_name = os.path.basename(file)
    labels_validation.append(file_name[5:8])

print("dataset train size: ", len(dataset_train))
print("dataset validation size: ", len(dataset_validation))
print("labels train size: ", len(labels_train))
print("labels validation size: ", len(labels_validation))

dataset train size:  770
dataset validation size:  210
labels train size:  770
labels validation size:  210


In [4]:
# print size of one element of the dataset: feature size
mfcc_size = dataset_train[0].shape
print ("mfcc_size: ", mfcc_size)

mfcc_size:  (349, 12)


In [5]:
classes = ["ita", "eng"]

# Create a mapping from class names to integer labels
class_to_index = {class_name: index for index, class_name in enumerate(classes)}

# Convert labels to integer labels using the mapping
integer_labels_train = np.array([class_to_index[label] for label in labels_train], dtype=np.int8)
integer_labels_validation = np.array([class_to_index[label] for label in labels_validation], dtype=np.int8)

y_onehot_train = tf.keras.utils.to_categorical(integer_labels_train, num_classes = len(classes)) # one hot encoding
y_onehot_validation = tf.keras.utils.to_categorical(integer_labels_validation, num_classes = len(classes)) # one hot encoding



In [6]:

x_train = tf.reshape(dataset_train, (-1, mfcc_size[0], mfcc_size[1], 1))
x_validation = tf.reshape(dataset_validation, (-1, mfcc_size[0], mfcc_size[1], 1))

print("Training features shape:", x_train.shape)
print("Validation features shape:", x_validation.shape)

# create tensorflow dataset from numpy arrays
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_onehot_train))
val_dataset = tf.data.Dataset.from_tensor_slices((x_validation, y_onehot_validation))


2023-08-18 16:16:10.849134: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Training features shape: (770, 349, 12, 1)
Validation features shape: (210, 349, 12, 1)


In [7]:
batch_size = 32
num_epochs = 100
mfcc_shape = (mfcc_size[0], mfcc_size[1], 1)

# shuffle and batch
train_dataset = train_dataset.shuffle(len(x_train))

# apply batching to the datasets
val_dataset = val_dataset.batch(batch_size)
train_dataset = train_dataset.batch(batch_size)

In [8]:
for image_batch, labels_batch in train_dataset:
	print("MFCC batch input feature shape: ", image_batch.shape)
	print("MFCC labels shape: ", labels_batch.shape)
	break

MFCC batch input feature shape:  (32, 349, 12, 1)
MFCC labels shape:  (32, 2)


2023-08-18 16:16:17.321011: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [770,349,12,1]
	 [[{{node Placeholder/_0}}]]
2023-08-18 16:16:17.321802: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [770,2]
	 [[{{node Placeholder/_1}}]]


In [24]:
# prints learning rate during training
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

# learning rate scheduler with polynomial decay
learning_rate_scheduler = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=0.002,
    decay_steps=10000,
    end_learning_rate=1e-4,
    power=0.5
)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate_scheduler)
lr_metric = get_lr_metric(optimizer)


In [28]:
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping


# Create a basic CNN model
model = models.Sequential([
	layers.Conv2D(filters=16, kernel_size=(5, 1), activation='relu', input_shape=mfcc_shape),
	layers.MaxPooling2D(pool_size=(2, 1)),
    #layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu'),
	#layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=16, kernel_size=(3, 1), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    #layers.GlobalAveragePooling2D(),
	layers.AveragePooling2D(pool_size=(41, 5)),
	layers.Flatten(),
	layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
	layers.Dense(2, activation='softmax')  # Two classes
])

model.summary()


ValueError: Input 0 of layer "lstm_4" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 85, 12, 16)

In [11]:

#POOL_SIZE = model.layers[-5].output.shape.as_list()[1:3]
#print(POOL_SIZE)

# Compile the model
model.compile(optimizer = optimizer,
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy', lr_metric])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=15, verbose=1, restore_best_weights=True)

callbacks_list = [early_stopping]

# Train the model
model.fit(x=train_dataset, epochs=num_epochs, callbacks=callbacks_list, validation_data=val_dataset)



Epoch 1/100

2023-08-18 16:19:18.426761: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [210,2]
	 [[{{node Placeholder/_1}}]]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 65: early stopping


<keras.callbacks.History at 0x7fb5d060e160>

In [12]:
# evaluate model on test set

evaluation = model.evaluate(val_dataset, batch_size=32)
evaluation = dict(zip(model.metrics_names, evaluation))
print(evaluation)



{'loss': 0.3583161532878876, 'accuracy': 0.8523809313774109, 'lr': 0.001838889205828309}


In [13]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
model.save(filepath +  "CNN_model")



INFO:tensorflow:Assets written to: /home/claudio/EmbeddedAI/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


INFO:tensorflow:Assets written to: /home/claudio/EmbeddedAI/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


## Model Conversion to Tensorflow Lite

In [14]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(filepath + "CNN_model")
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    #tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]



converter.experimental_enable_resource_variables = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]

def representative_dataset(num_samples = x_train.shape[0]):
    for x, y in train_dataset.take(num_samples):
    	yield [tf.cast(x, dtype=tf.float32)]

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset

tflite_model = converter.convert()

# write the converted model into a file
with open(filepath + "CNN_model.tflite", 'wb') as f:
	f.write(tflite_model)

2023-08-18 16:25:23.184923: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-08-18 16:25:23.185000: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-08-18 16:25:23.186751: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /home/claudio/EmbeddedAI/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-18 16:25:23.200432: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-08-18 16:25:23.200620: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /home/claudio/EmbeddedAI/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-18 16:25:23.202057: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2023-08-18 16:25:23.229325:

In [15]:
model_path = filepath + "CNN_model.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)

[{'name': 'serving_default_conv2d_input:0', 'index': 0, 'shape': array([  1, 349,  12,   1], dtype=int32), 'shape_signature': array([ -1, 349,  12,   1], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (1.0, 0), 'quantization_parameters': {'scales': array([1.], dtype=float32), 'zero_points': array([0], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 25, 'shape': array([1, 2], dtype=int32), 'shape_signature': array([-1,  2], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128), 'quantization_parameters': {'scales': array([0.00390625], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [16]:
# Assuming single input and output tensors.
input_shape = input_details[0]['shape']
output_shape = output_details[0]['shape']

print(input_shape)
print(output_shape)

[  1 349  12   1]
[1 2]


In [18]:
random_index = np.random.randint(0, len(x_validation))
print("Random index: ", random_index)

# Select the random data point using the random index
random_data_point = tf.convert_to_tensor(tf.cast(x_validation[random_index], tf.int8))
random_label = tf.convert_to_tensor(tf.cast(y_onehot_validation[random_index], tf.int8))
# Convert the random data point from int8 to float32
batch_size = 1

random_data_point = tf.reshape(random_data_point, (batch_size, mfcc_size[0], mfcc_size[1], 1))
print(random_data_point)



Random index:  207
tf.Tensor(
[[[[-102]
   [   7]
   [ -14]
   ...
   [   0]
   [  12]
   [  45]]

  [[ -12]
   [ -70]
   [  -2]
   ...
   [  28]
   [  -6]
   [ 121]]

  [[ -19]
   [ -55]
   [  60]
   ...
   [ -23]
   [  17]
   [  32]]

  ...

  [[ -93]
   [   5]
   [  -9]
   ...
   [ -12]
   [  25]
   [  34]]

  [[ -16]
   [  12]
   [  -5]
   ...
   [ -23]
   [  40]
   [  55]]

  [[  52]
   [  54]
   [ -95]
   ...
   [  77]
   [  28]
   [   4]]]], shape=(1, 349, 12, 1), dtype=int8)


In [19]:
# Set input data to the interpreter.
interpreter.set_tensor(input_details[0]['index'], random_data_point)

# Run inference.
interpreter.invoke()

# Get output data from the interpreter.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)

[[-120  120]]


In [20]:
# Process output data.
# For example, if your output is classification probabilities:
predicted_class = np.argmax(output_data)
print("Predicted class:", predicted_class)
print("True label: ", random_label)

Predicted class: 1
True label:  tf.Tensor([0 1], shape=(2,), dtype=int8)


In [21]:
!xxd -i ./../model_lite/CNN_model.tflite > ./../model_lite/model_tflite_data.cc