## Training a neural network to classify images of MFCCs

In [1]:
# create tensorflow dataset from the training data

# iterate on every csv file in the folder
# create a dataset from the csv file
# append the dataset to the main dataset
# save the main dataset to a file

import os

# get parent directory
parent_dir = os.path.dirname(os.getcwd())

# get the list of csv files in the folder
directory = parent_dir + "\\datasets\\"

csv_files_ita = [directory + "\\ita\\" + f for f in os.listdir(directory + "ita\\") if f.endswith('.csv')]
csv_files_eng = [directory + "\\eng\\" + f for f in os.listdir(directory + "eng\\") if f.endswith('.csv')]

print(len(csv_files_ita), len(csv_files_eng))



425 450


Test import of csv datasets into tensorflow datasets

import every csv file as a single matrix with one label associated

In [9]:
import tensorflow as tf

# Print TensorFlow version

print("TensorFlow version:", tf.__version__)

 

print(tf.config.list_physical_devices('GPU'))

# Check if GPU is available and being used



TensorFlow version: 2.10.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
import numpy as np

# read csv file and store the values into a numpy matrix
data_array = np.genfromtxt(csv_files_ita[0], delimiter=',', dtype=np.int8)
print("Loaded data array shape:", data_array.shape)

label = "ita"
test = [[1, 2], [3, 4]]

# Create a TensorFlow dataset
tf_dataset_matrix = tf.data.Dataset.from_tensor_slices([data_array])
tf_dataset_label = tf.data.Dataset.from_tensor_slices([label])

tf_dataset = tf.data.Dataset.zip((tf_dataset_matrix, tf_dataset_label))

for element in tf_dataset:
    print(element)

Loaded data array shape: (1247, 12)
(<tf.Tensor: shape=(1247, 12), dtype=int8, numpy=
array([[-89,  32,  76, ...,  35,  63,  49],
       [-84,  18,  57, ..., -11,  61,  22],
       [-83,  14,  66, ..., -32,  56,  20],
       ...,
       [ -6,  87,  22, ..., -14,  14,   5],
       [ 11,  71,  -4, ...,  -7,   1, -16],
       [ 32,  69, -33, ..., -17,   0,   2]], dtype=int8)>, <tf.Tensor: shape=(), dtype=string, numpy=b'ita'>)


In [4]:
# create empty pandas dataframe
dataset = []
labels = []

# read csv files into dataframe
for file in csv_files_ita:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset.append(data_array)
    labels.append("ita")

for file in csv_files_eng:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset.append(data_array)
    labels.append("eng")

print("dataset size: ", len(dataset))
print("labels size: ", len(labels))


dataset size:  875
labels size:  875


In [5]:
classes = ["ita", "eng"]

# Create a mapping from class names to integer labels
class_to_index = {class_name: index for index, class_name in enumerate(classes)}

# Convert labels to integer labels using the mapping
integer_labels = np.array([class_to_index[label] for label in labels], dtype=np.int8)

labels_one_hot = tf.keras.utils.to_categorical(integer_labels, num_classes=2) # one hot encoding

print(labels_one_hot)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In [40]:
# train - validation split of tensorflow dataset
from sklearn.model_selection import train_test_split

train_features, val_features, train_labels, val_labels = train_test_split(dataset, labels_one_hot, test_size=0.2, random_state=42) 
print(val_features[0].dtype)

val_features = tf.reshape(val_features, (-1, 1247, 12))
train_features = tf.reshape(train_features, (-1, 1247, 12))


# create tensorflow dataset from numpy arrays
train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_features, val_labels))

# shuffle and batch
train_dataset = train_dataset.shuffle(len(train_features))

val_dataset = val_dataset.batch(32)
train_dataset = train_dataset.batch(32)

int8


In [7]:
for image_batch, labels_batch in train_dataset:
	print(image_batch.shape)
	print(labels_batch.shape)
	break


(32, 1247, 12)
(32, 2)


In [22]:
from tensorflow.keras import layers, models
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

import absl.logging

absl.logging.set_verbosity(absl.logging.ERROR)
# Create a basic CNN model
model = models.Sequential([
    #layers.Reshape(( 1247, 12), input_shape=(1247, 12)),
	layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu', input_shape=(1247, 12, 1)),
	layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=64, kernel_size=(5, 1), activation='relu'),
	layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=32, kernel_size=(3, 1), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.GlobalAveragePooling2D(), 
	layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
	layers.Dense(2, activation='softmax')  # Two classes
])

model.summary()

# Compile the model
model.compile(optimizer='adam',
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy'])

parent_dir = os.path.dirname(os.getcwd())
filepath = parent_dir + "\\models\\"

checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy',save_best_only=True, mode='max')
es = EarlyStopping(monitor='val_loss', mode='min', patience=10)

callbacks_list = [checkpoint, es]

# Train the model
model.fit(x=train_dataset, epochs=50, callbacks=callbacks_list, batch_size=32, 
          validation_data=val_dataset)


Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_49 (Conv2D)          (None, 1243, 12, 64)      384       
                                                                 
 max_pooling2d_49 (MaxPoolin  (None, 621, 12, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_50 (Conv2D)          (None, 617, 12, 64)       20544     
                                                                 
 max_pooling2d_50 (MaxPoolin  (None, 308, 12, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_51 (Conv2D)          (None, 306, 12, 32)       6176      
                                                                 
 max_pooling2d_51 (MaxPoolin  (None, 153, 12, 32)    

INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 2/50
Epoch 3/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 4/50
Epoch 5/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 6/50
Epoch 7/50
Epoch 8/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 9/50
Epoch 10/50
Epoch 11/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 33/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 34/50
Epoch 35/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 36/50
Epoch 37/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 38/50
Epoch 39/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 40/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 41/50
Epoch 42/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 48/50
Epoch 49/50


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\models\assets


Epoch 50/50


<keras.callbacks.History at 0x1f1475565b0>

In [28]:
filepath = parent_dir + "\\model_lite\\"
model.save(filepath +  "CNN_model_h5")

INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\model_lite\CNN_model_h5\assets


INFO:tensorflow:Assets written to: C:\Users\HP\Documents\GitHub\Spoken_Language_Recognition_Tensorflow_Embedded\model_lite\CNN_model_h5\assets


In [50]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(filepath + "CNN_model")
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    #tf.lite.OpsSet.SELECT_TF_OPS  # enable TensorFlow ops.
]

converter.experimental_enable_resource_variables = True

converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open(filepath + "CNN_model.tflite", 'wb') as f:
	f.write(tflite_model)

In [51]:
model_path = filepath + "CNN_model.tflite"
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Get input and output details.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)

[{'name': 'serving_default_conv2d_49_input:0', 'index': 0, 'shape': array([   1, 1247,   12,    1]), 'shape_signature': array([  -1, 1247,   12,    1]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 28, 'shape': array([1, 2]), 'shape_signature': array([-1,  2]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [52]:
# Assuming single input and output tensors.
input_shape = input_details[0]['shape']
output_shape = output_details[0]['shape']

print(input_shape)
print(output_shape)

[   1 1247   12    1]
[1 2]


In [65]:
random_index = np.random.randint(0, len(dataset))

print(dataset[0])
# Select the random data point using the random index
random_data_point = dataset[random_index]
random_label = labels[random_index]
# Convert the random data point from int8 to float32
input_data_matrix = random_data_point.astype(np.float32)
batch_size = 1

input_data_matrix = tf.reshape(input_data_matrix, (-1, 1247, 12, 1))


[[-89  32  76 ...  35  63  49]
 [-84  18  57 ... -11  61  22]
 [-83  14  66 ... -32  56  20]
 ...
 [ -6  87  22 ... -14  14   5]
 [ 11  71  -4 ...  -7   1 -16]
 [ 32  69 -33 ... -17   0   2]]


In [66]:
# Set input data to the interpreter.
interpreter.set_tensor(input_details[0]['index'], input_data_matrix)

# Run inference.
interpreter.invoke()

# Get output data from the interpreter.
output_data = interpreter.get_tensor(output_details[0]['index'])

ValueError: Cannot set tensor: Dimension mismatch. Got 1 but expected 1247 for dimension 1 of input 0.

In [None]:
# Process output data.
# For example, if your output is classification probabilities:
predicted_class = np.argmax(output_data)
print("Predicted class:", predicted_class)