## Training a neural network to classify images of MFCCs

### Load dataset

In [33]:

import os

current_dir = os.getcwd()
directory = os.path.dirname(current_dir) + "/datasets/"
csv_files_train = [directory + "/train/" + f for f in os.listdir(directory + "train/") if f.endswith('.csv')]
csv_files_validation = [directory + "/validation/" + f for f in os.listdir(directory + "validation/") if f.endswith('.csv')]

print("training files: ", len(csv_files_train))
print("validation files: ", len(csv_files_validation))


training files:  770
validation files:  210


Test import of csv datasets into tensorflow datasets

import every csv file as a single matrix with one label associated

In [2]:
import tensorflow as tf

# Print TensorFlow version
print("TensorFlow version:", tf.__version__)

# Check if GPU is available and being used
print(tf.config.list_physical_devices('GPU'))



2023-08-16 16:07:24.525515: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version: 2.13.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-08-16 16:07:38.815474: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-16 16:07:41.873864: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-16 16:07:41.874118: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [34]:
import numpy as np

# create training and validation datasets
dataset_train = []
dataset_validation = []
labels_train = []
labels_validation = []

# read csv files into lists
# the label (language) is written in the file name

for file in csv_files_train:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_train.append(data_array)
    
    file_name = os.path.basename(file)
    labels_train.append(file_name[5:8])

for file in csv_files_validation:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset_validation.append(data_array)

    file_name = os.path.basename(file)
    labels_validation.append(file_name[5:8])

print("dataset train size: ", len(dataset_train))
print("dataset validation size: ", len(dataset_validation))
print("labels train size: ", len(labels_train))
print("labels validation size: ", len(labels_validation))

dataset train size:  770
dataset validation size:  210
labels train size:  770
labels validation size:  210


In [35]:
# print size of one element of the dataset: feature size
mfcc_size = dataset_train[0].shape
print ("mfcc_size: ", mfcc_size)

mfcc_size:  (349, 12)


In [36]:
classes = ["ita", "eng"]

# Create a mapping from class names to integer labels
class_to_index = {class_name: index for index, class_name in enumerate(classes)}

# Convert labels to integer labels using the mapping
integer_labels_train = np.array([class_to_index[label] for label in labels_train], dtype=np.int8)
integer_labels_validation = np.array([class_to_index[label] for label in labels_validation], dtype=np.int8)

y_onehot_train = tf.keras.utils.to_categorical(integer_labels_train, num_classes = len(classes)) # one hot encoding
y_onehot_validation = tf.keras.utils.to_categorical(integer_labels_validation, num_classes = len(classes)) # one hot encoding



In [37]:

x_train = tf.reshape(dataset_train, (-1, mfcc_size[0], mfcc_size[1], 1))
x_validation = tf.reshape(dataset_validation, (-1, mfcc_size[0], mfcc_size[1], 1))

print("Training features shape:", x_train.shape)
print("Validation features shape:", x_validation.shape)

# create tensorflow dataset from numpy arrays
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_onehot_train))
val_dataset = tf.data.Dataset.from_tensor_slices((x_validation, y_onehot_validation))


Training features shape: (770, 349, 12, 1)
Validation features shape: (210, 349, 12, 1)


In [38]:
batch_size = 32
num_epochs = 100
mfcc_shape = (mfcc_size[0], mfcc_size[1], 1)

# shuffle and batch
train_dataset = train_dataset.shuffle(len(x_train))

# apply batching to the datasets
val_dataset = val_dataset.batch(batch_size)
train_dataset = train_dataset.batch(batch_size)

In [39]:
for image_batch, labels_batch in train_dataset:
	print("MFCC batch input feature shape: ", image_batch.shape)
	print("MFCC labels shape: ", labels_batch.shape)
	break

MFCC batch input feature shape:  (32, 349, 12, 1)
MFCC labels shape:  (32, 2)


In [40]:
# prints learning rate during training
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

# learning rate scheduler with polynomial decay
learning_rate_scheduler = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=0.002,
    decay_steps=10000,
    end_learning_rate=1e-4,
    power=0.5
)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate_scheduler)
lr_metric = get_lr_metric(optimizer)


In [41]:
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping


# Create a basic CNN model
model = models.Sequential([
	layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu', input_shape=mfcc_shape),
	layers.MaxPooling2D(pool_size=(2, 1)),
    #layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu'),
	#layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=32, kernel_size=(3, 1), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    #layers.GlobalAveragePooling2D(),
	layers.AveragePooling2D(pool_size=(41, 5)),
	layers.Flatten(),
	layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
	layers.Dense(2, activation='softmax')  # Two classes
])

model.summary()

#POOL_SIZE = model.layers[-5].output.shape.as_list()[1:3]
#print(POOL_SIZE)

# Compile the model
model.compile(optimizer = optimizer,
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy', lr_metric])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=15, verbose=1, restore_best_weights=True)

callbacks_list = [early_stopping]

# Train the model
model.fit(x=train_dataset, epochs=num_epochs, callbacks=callbacks_list, validation_data=val_dataset)



Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_15 (Conv2D)          (None, 345, 12, 64)       384       
                                                                 
 max_pooling2d_15 (MaxPooli  (None, 172, 12, 64)       0         
 ng2D)                                                           
                                                                 
 conv2d_16 (Conv2D)          (None, 170, 12, 32)       6176      
                                                                 
 max_pooling2d_16 (MaxPooli  (None, 85, 12, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_17 (Conv2D)          (None, 83, 10, 32)        9248      
                                                                 
 max_pooling2d_17 (MaxPooli  (None, 41, 5, 32)        

<keras.src.callbacks.History at 0x7f36902af0d0>

In [42]:
# evaluate model on test set

evaluation = model.evaluate(val_dataset, batch_size=32)
evaluation = dict(zip(model.metrics_names, evaluation))
print(evaluation)

1/7 [===>..........................] - ETA: 0s - loss: 0.3673 - accuracy: 0.8750 - lr: 0.0019

{'loss': 0.47045403718948364, 'accuracy': 0.7952380776405334, 'lr': 0.0018773889169096947}


In [43]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
model.save(filepath +  "CNN_model")

INFO:tensorflow:Assets written to: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


INFO:tensorflow:Assets written to: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model/assets


## Model Conversion to Tensorflow Lite

In [45]:
parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "/model_lite/"
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(filepath + "CNN_model")
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    #tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]



converter.experimental_enable_resource_variables = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]

def representative_dataset(num_samples = x_train.shape[0]):
    for x, y in train_dataset.take(num_samples):
    	yield [tf.cast(x, dtype=tf.float32)]

converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset

tflite_model = converter.convert()

# write the converted model into a file
with open(filepath + "CNN_model.tflite", 'wb') as f:
	f.write(tflite_model)

2023-08-16 16:40:35.591000: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-08-16 16:40:35.591031: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-08-16 16:40:35.591313: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-16 16:40:35.594793: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2023-08-16 16:40:35.594831: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /home/simon/Spoken_Language_Recognition_Tensorflow_Embedded/model_lite/CNN_model
2023-08-16 16:40:35.601840: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-08-16 16:40:35.692869: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /home/simon/Spoken_Language_R

In [46]:
model_path = filepath + "CNN_model.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)

[{'name': 'serving_default_conv2d_15_input:0', 'index': 0, 'shape': array([  1, 349,  12,   1], dtype=int32), 'shape_signature': array([ -1, 349,  12,   1], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (1.0, 0), 'quantization_parameters': {'scales': array([1.], dtype=float32), 'zero_points': array([0], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 25, 'shape': array([1, 2], dtype=int32), 'shape_signature': array([-1,  2], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128), 'quantization_parameters': {'scales': array([0.00390625], dtype=float32), 'zero_points': array([-128], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [47]:
# Assuming single input and output tensors.
input_shape = input_details[0]['shape']
output_shape = output_details[0]['shape']

print(input_shape)
print(output_shape)

[  1 349  12   1]
[1 2]


In [62]:
random_index = np.random.randint(0, len(x_validation))
print("Random index: ", random_index)

# Select the random data point using the random index
random_data_point = tf.convert_to_tensor(tf.cast(x_validation[random_index], tf.int8))
random_label = tf.convert_to_tensor(tf.cast(y_onehot_validation[random_index], tf.int8))
# Convert the random data point from int8 to float32
batch_size = 1

random_data_point = tf.reshape(random_data_point, (batch_size, mfcc_size[0], mfcc_size[1], 1))
print(random_data_point)



Random index:  87
tf.Tensor(
[[[[ 89]
   [-20]
   [-47]
   ...
   [  4]
   [ 21]
   [-41]]

  [[  2]
   [ 44]
   [  0]
   ...
   [-76]
   [ 19]
   [ -6]]

  [[-10]
   [ 41]
   [  6]
   ...
   [-54]
   [ 33]
   [-12]]

  ...

  [[ 47]
   [ 19]
   [-45]
   ...
   [ 17]
   [-23]
   [  6]]

  [[ 21]
   [  5]
   [-34]
   ...
   [  5]
   [ -9]
   [-16]]

  [[-29]
   [ 22]
   [ 30]
   ...
   [ 75]
   [ -1]
   [-14]]]], shape=(1, 349, 12, 1), dtype=int8)


In [63]:
# Set input data to the interpreter.
interpreter.set_tensor(input_details[0]['index'], random_data_point)

# Run inference.
interpreter.invoke()

# Get output data from the interpreter.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)

[[ 70 -70]]


In [64]:
# Process output data.
# For example, if your output is classification probabilities:
predicted_class = np.argmax(output_data)
print("Predicted class:", predicted_class)
print("True label: ", random_label)

Predicted class: 0
True label:  tf.Tensor([1 0], shape=(2,), dtype=int8)


In [66]:
!xxd -i ./../model_lite/CNN_model.tflite > ./../model_lite/model_tflite_data.cc

/bin/bash: /home/simon/miniconda3/lib/libtinfo.so.6: no version information available (required by /bin/bash)
