# CSC300 - Convolutional Neural Network Project
> Goal: Build and train a CNN model using `tensorflow` and spectrograms given for the assignment.

## Loading data & Converting to Numbers
1. Load Libraries
2. Read CSV file with data structure

In [106]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
import pandas as pd
import numpy as np
import os

In [107]:
import sys
from PIL import Image
sys.modules['Image'] = Image

In [108]:
labels_df = pd.read_csv('./spectrograms/labels.csv')

The images are spectrograms and cannot be resizedas such we must ensure that all images are already in the required dimensions of 100x100 pixels before using them in the model. If any images do not meet this requirement, they should be excluded through .

In [109]:
def isValidImage(imagePath, targetSize=(100, 100)):
    image = Image.open(imagePath)
    return image.size == targetSize

In [110]:
def loadAndPreprocessImage(filePath, targetSize=(100, 100)):
    image = Image.open(filePath)
    image = np.array(image) / 255.0  # normalize the image
    return image

In [111]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe, directory, xCol, yCol, targetSize=(100, 100), batchSize=32, classMode='categorical', subset=None):
        self.dataframe = dataframe
        self.directory = directory
        self.xCol = xCol
        self.yCol = yCol
        self.targetSize = targetSize
        self.batchSize = batchSize
        self.classMode = classMode
        self.subset = subset
        self.datagen = ImageDataGenerator(validation_split=0.2)
        self.validDataframe = self._filterDataframe()
        self.samples = len(self.validDataframe)
        self.generator = self.datagen.flow_from_dataframe(
            dataframe=self.validDataframe,
            directory=self.directory,
            x_col=self.xCol,
            y_col=self.yCol,
            target_size=self.targetSize,
            batch_size=self.batchSize,
            class_mode=self.classMode,
            subset=self.subset
        )

    def _filterDataframe(self):
        validRows = []
        for _, row in self.dataframe.iterrows():
            imagePath = os.path.join(self.directory, row[self.xCol])
            if isValidImage(imagePath, self.targetSize):
                validRows.append(row)
        return pd.DataFrame(validRows)

    def __len__(self):
        return len(self.generator)

    def __getitem__(self, index):
        return self.generator[index]

In [112]:
trainGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='training'
)

validationGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='validation'
)

Found 2000 validated image filenames belonging to 5 classes.
Found 500 validated image filenames belonging to 5 classes.


In [113]:
# Build model
model = Sequential([
    Input(shape=(100, 100, 3)),  # Specify the input shape in the Input layer
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')  # assuming 5 classes for labels a, b, c, d, e
])

In [114]:
# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [115]:
# Train model
history = model.fit(
    trainGenerator,
    steps_per_epoch=trainGenerator.samples // trainGenerator.batchSize,
    validation_data=validationGenerator,
    validation_steps=validationGenerator.samples // validationGenerator.batchSize,
    epochs=10  # adjust the number of epochs as needed
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m8/9[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 751ms/step - accuracy: 0.1879 - loss: 75.2983

  self.gen.throw(value)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 795ms/step - accuracy: 0.1899 - loss: 72.7680 - val_accuracy: 0.1440 - val_loss: 1.9307
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 732ms/step - accuracy: 0.2225 - loss: 1.7196 - val_accuracy: 0.2220 - val_loss: 1.6046
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 773ms/step - accuracy: 0.3238 - loss: 1.5320 - val_accuracy: 0.2520 - val_loss: 1.5829
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 733ms/step - accuracy: 0.3511 - loss: 1.4931 - val_accuracy: 0.2920 - val_loss: 1.5571
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 717ms/step - accuracy: 0.3798 - loss: 1.4432 - val_accuracy: 0.2560 - val_loss: 1.5645
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 728ms/step - accuracy: 0.3794 - loss: 1.4474 - val_accuracy: 0.2840 - val_loss: 1.5388
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━

In [116]:
# Evaluate model
valLoss, valAcc = model.evaluate(validationGenerator)
print(f'Validation Loss: {valLoss}')
print(f'Validation Accuracy: {valAcc}')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 279ms/step - accuracy: 0.3768 - loss: 1.5249
Validation Loss: 1.5257457494735718
Validation Accuracy: 0.36800000071525574


In [117]:
# Save model
model.save('imageClassifierModel.h5')

