# CSC300 - Convolutional Neural Network Project
> Goal: Build and train a CNN model using `tensorflow` and spectrograms given for the assignment.

## Loading data & Converting to Numbers
1. Load Libraries
2. Read CSV file with data structure

In [30]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
import pandas as pd
import numpy as np
import os

In [31]:
import sys
from PIL import Image
sys.modules['Image'] = Image

In [32]:
labels_df = pd.read_csv('./spectrograms/labels.csv')

The images are spectrograms and cannot be resizedas such we must ensure that all images are already in the required dimensions of 100x100 pixels before using them in the model. If any images do not meet this requirement, they should be excluded through .

In [33]:
def isValidImage(imagePath, targetSize=(100, 100)):
    image = Image.open(imagePath)
    return image.size == targetSize

In [34]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe, directory, xCol, yCol, targetSize=(100, 100), batchSize=32, classMode='categorical', subset=None):
        self.dataframe = dataframe
        self.directory = directory
        self.xCol = xCol
        self.yCol = yCol
        self.targetSize = targetSize
        self.batchSize = batchSize
        self.classMode = classMode
        self.subset = subset
        self.datagen = ImageDataGenerator(validation_split=0.2)
        self.validDataframe = self._filterDataframe()
        self.samples = len(self.validDataframe)
        self.generator = self.datagen.flow_from_dataframe(
            dataframe=self.validDataframe,
            directory=self.directory,
            x_col=self.xCol,
            y_col=self.yCol,
            target_size=self.targetSize,
            batch_size=self.batchSize,
            class_mode=self.classMode,
            subset=self.subset
        )

    def _filterDataframe(self):
        validRows = []
        for _, row in self.dataframe.iterrows():
            imagePath = os.path.join(self.directory, row[self.xCol])
            if isValidImage(imagePath, self.targetSize):
                validRows.append(row)
        return pd.DataFrame(validRows)

    def __len__(self):
        return len(self.generator)

    def __getitem__(self, index):
        return self.generator[index]

In [35]:
trainGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='training'
)

validationGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='validation'
)

Found 2000 validated image filenames belonging to 5 classes.
Found 500 validated image filenames belonging to 5 classes.


In [36]:
# Build model
model = Sequential([
    Input(shape=(100, 100, 3)),  # Specify the input shape in the Input layer
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')  # assuming 5 classes for labels a, b, c, d, e
])

In [37]:
# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [38]:
# Train model
history = model.fit(
    trainGenerator,
    steps_per_epoch=trainGenerator.samples // trainGenerator.batchSize,
    validation_data=validationGenerator,
    validation_steps=validationGenerator.samples // validationGenerator.batchSize,
    epochs=15  # adjust the number of epochs as needed
)

Epoch 1/15


  self._warn_if_super_not_called()


[1m8/9[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 720ms/step - accuracy: 0.2109 - loss: 72.4050

  self.gen.throw(value)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 768ms/step - accuracy: 0.2101 - loss: 69.8074 - val_accuracy: 0.1660 - val_loss: 2.8205
Epoch 2/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 734ms/step - accuracy: 0.2186 - loss: 2.1179 - val_accuracy: 0.1980 - val_loss: 1.5987
Epoch 3/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 729ms/step - accuracy: 0.2615 - loss: 1.5800 - val_accuracy: 0.1940 - val_loss: 1.6072
Epoch 4/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 718ms/step - accuracy: 0.2957 - loss: 1.5408 - val_accuracy: 0.2180 - val_loss: 1.5605
Epoch 5/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 719ms/step - accuracy: 0.3395 - loss: 1.4844 - val_accuracy: 0.2580 - val_loss: 1.5778
Epoch 6/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 705ms/step - accuracy: 0.3723 - loss: 1.4552 - val_accuracy: 0.2940 - val_loss: 1.5268
Epoch 7/15
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [39]:
# Evaluate model
valLoss, valAcc = model.evaluate(validationGenerator)
print(f'Validation Loss: {valLoss}')
print(f'Validation Accuracy: {valAcc}')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 287ms/step - accuracy: 0.3664 - loss: 1.4715
Validation Loss: 1.4719520807266235
Validation Accuracy: 0.36800000071525574


In [40]:
# Save model
model.save('imageClassifierModel.h5')

