## Training a neural network to classify images of MFCCs

In [16]:
# create tensorflow dataset from the training data

# iterate on every csv file in the folder
# create a dataset from the csv file
# append the dataset to the main dataset
# save the main dataset to a file

import os

# get parent directory
parent_dir = os.path.dirname(os.getcwd())

# get the list of csv files in the folder
directory = parent_dir + "\\datasets\\"

csv_files_ita = [directory + "\\ita\\" + f for f in os.listdir(directory + "ita\\") if f.endswith('.csv')]
csv_files_eng = [directory + "\\eng\\" + f for f in os.listdir(directory + "eng\\") if f.endswith('.csv')]

print(len(csv_files_ita), len(csv_files_eng))



425 450


Test import of csv datasets into tensorflow datasets

import every csv file as a single matrix with one label associated

In [None]:
import tensorflow as tf
# Print TensorFlow version
print("TensorFlow version:", tf.__version__)

# Check if GPU is available and being used
if tf.test.is_gpu_available():
    print("GPU is available and being used.")
    # Print GPU device information
    print("GPU devices:")
    for device in tf.config.experimental.list_physical_devices('GPU'):
        print(device)
else:
    print("GPU is not available.")

In [56]:
import numpy as np

# read csv file and store the values into a numpy matrix
data_array = np.genfromtxt(csv_files_ita[0], delimiter=',', dtype=np.int8)
print("Loaded data array shape:", data_array.shape)

label = "ita"
test = [[1, 2], [3, 4]]

# Create a TensorFlow dataset
tf_dataset_matrix = tf.data.Dataset.from_tensor_slices([data_array])
tf_dataset_label = tf.data.Dataset.from_tensor_slices([label])

tf_dataset = tf.data.Dataset.zip((tf_dataset_matrix, tf_dataset_label))

for element in tf_dataset:
    print(element)

Loaded data array shape: (1247, 12)
(<tf.Tensor: shape=(1247, 12), dtype=int8, numpy=
array([[-89,  32,  76, ...,  35,  63,  49],
       [-84,  18,  57, ..., -11,  61,  22],
       [-83,  14,  66, ..., -32,  56,  20],
       ...,
       [ -6,  87,  22, ..., -14,  14,   5],
       [ 11,  71,  -4, ...,  -7,   1, -16],
       [ 32,  69, -33, ..., -17,   0,   2]], dtype=int8)>, <tf.Tensor: shape=(), dtype=string, numpy=b'ita'>)


In [65]:
# create empty pandas dataframe
dataset = []
labels = []

# read csv files into dataframe
for file in csv_files_ita:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset.append(data_array)
    labels.append("ita")

for file in csv_files_eng:
    data_array = np.genfromtxt(file, delimiter=',', dtype=np.int8)
    dataset.append(data_array)
    labels.append("eng")

print("dataset size: ", len(dataset))
print("labels size: ", len(labels))



dataset size:  875
labels size:  875


In [68]:
classes = ["ita", "eng"]

# Create a mapping from class names to integer labels
class_to_index = {class_name: index for index, class_name in enumerate(classes)}

# Convert labels to integer labels using the mapping
integer_labels = np.array([class_to_index[label] for label in labels], dtype=np.int8)

labels_one_hot = tf.keras.utils.to_categorical(integer_labels, num_classes=2) # one hot encoding

print(labels_one_hot)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In [90]:
# train - validation split of tensorflow dataset
from sklearn.model_selection import train_test_split

train_features, val_features, train_labels, val_labels = train_test_split(dataset, labels_one_hot, test_size=0.2, random_state=42) 


val_features = tf.reshape(val_features, (-1, 1247, 12))
train_features = tf.reshape(train_features, (-1, 1247, 12))


# create tensorflow dataset from numpy arrays
train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_features, val_labels))

# shuffle and batch
train_dataset = train_dataset.shuffle(len(train_features))

val_dataset = val_dataset.batch(32)
train_dataset = train_dataset.batch(32)

In [91]:
for image_batch, labels_batch in train_dataset:
	print(image_batch.shape)
	print(labels_batch.shape)
	break


(1247, 12)
(2,)


In [92]:
from tensorflow.keras import layers, models

# Create a basic CNN model
model = models.Sequential([
    #layers.Reshape(( 1247, 12), input_shape=(1247, 12)),
	layers.Conv2D(filters=32, kernel_size=(10, 1), activation='relu', input_shape=(1247, 12, 1)),
	layers.MaxPooling2D(pool_size=(2, 1)),
    layers.Conv2D(filters=32, kernel_size=(10, 1), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 1)),
	layers.Flatten(),
	layers.Dense(64, activation='relu'),
	layers.Dense(2, activation='softmax')  # Two classes
])

model.summary()

# Compile the model
model.compile(optimizer='adam',
			  loss='categorical_crossentropy',  # Use 'categorical_crossentropy' for one-hot encoded labels
			  metrics=['accuracy'])

# Train the model
model.fit(x=train_dataset, epochs=10, batch_size=32, 
          validation_data=val_dataset)


Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_29 (Conv2D)          (None, 1238, 12, 32)      352       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 619, 12, 32)      0         
 2D)                                                             
                                                                 
 conv2d_30 (Conv2D)          (None, 610, 12, 32)       10272     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 305, 12, 32)      0         
 2D)                                                             
                                                                 
 flatten_18 (Flatten)        (None, 117120)            0         
                                                                 
 dense_38 (Dense)            (None, 64)              

ValueError: in user code:

    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\engine\training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\engine\training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\engine\training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\engine\training.py", line 1050, in train_step
        y_pred = self(x, training=True)
    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "d:\Programs\Anaconda\envs\anndl\Lib\site-packages\keras\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_19" is incompatible with the layer: expected shape=(None, 1247, 12, 1), found shape=(1247, 12)
