# CSC300 - Convolutional Neural Network Project
> Goal: Build and train a CNN model using `tensorflow` and spectrograms given for the assignment.

## Loading data & Converting to Numbers
1. Load Libraries
2. Read CSV file with data structure

In [119]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
import pandas as pd
import numpy as np
import os

In [120]:
import sys
from PIL import Image
sys.modules['Image'] = Image

In [121]:
labels_df = pd.read_csv('./spectrograms/labels.csv')

The images are spectrograms and cannot be resizedas such we must ensure that all images are already in the required dimensions of 100x100 pixels before using them in the model. If any images do not meet this requirement, they should be excluded through .

In [122]:
def isValidImage(imagePath, targetSize=(100, 100)):
    image = Image.open(imagePath)
    return image.size == targetSize

In [123]:
def loadAndPreprocessImage(filePath, targetSize=(100, 100)):
    image = Image.open(filePath)
    image = np.array(image) / 255.0  # normalize the image
    return image

In [124]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe, directory, xCol, yCol, targetSize=(100, 100), batchSize=32, classMode='categorical', subset=None):
        self.dataframe = dataframe
        self.directory = directory
        self.xCol = xCol
        self.yCol = yCol
        self.targetSize = targetSize
        self.batchSize = batchSize
        self.classMode = classMode
        self.subset = subset
        self.datagen = ImageDataGenerator(validation_split=0.2)
        self.validDataframe = self._filterDataframe()
        self.samples = len(self.validDataframe)
        self.generator = self.datagen.flow_from_dataframe(
            dataframe=self.validDataframe,
            directory=self.directory,
            x_col=self.xCol,
            y_col=self.yCol,
            target_size=self.targetSize,
            batch_size=self.batchSize,
            class_mode=self.classMode,
            subset=self.subset
        )

    def _filterDataframe(self):
        validRows = []
        for _, row in self.dataframe.iterrows():
            imagePath = os.path.join(self.directory, row[self.xCol])
            if isValidImage(imagePath, self.targetSize):
                validRows.append(row)
        return pd.DataFrame(validRows)

    def __len__(self):
        return len(self.generator)

    def __getitem__(self, index):
        return self.generator[index]

In [125]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [126]:
# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)  # Only rescale for validation data

In [127]:
trainGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='training'
)

validationGenerator = CustomDataGenerator(
    dataframe=labels_df,
    directory='.',  # base directory for the relative image paths in the CSV
    xCol='image_path',
    yCol='class_label',
    targetSize=(100, 100),
    batchSize=256,
    classMode='categorical',
    subset='validation'
)

Found 2000 validated image filenames belonging to 5 classes.
Found 500 validated image filenames belonging to 5 classes.


In [128]:
# Model Architecture with Transfer Learning
base_model = VGG16(include_top=False, weights='imagenet', input_shape=(100, 100, 3))
base_model.trainable = False  # Freeze the base model

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 0us/step


In [129]:
# Build model
model = Sequential([
    Input(shape=(100, 100, 3)),  # Specify the input shape in the Input layer
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')  # assuming 5 classes for labels a, b, c, d, e
])

In [130]:
# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [131]:
# Train model
history = model.fit(
    trainGenerator,
    steps_per_epoch=trainGenerator.samples // trainGenerator.batchSize,
    validation_data=validationGenerator,
    validation_steps=validationGenerator.samples // validationGenerator.batchSize,
    epochs=10  # adjust the number of epochs as needed
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m8/9[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 634ms/step - accuracy: 0.2162 - loss: 44.7163

  self.gen.throw(value)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 680ms/step - accuracy: 0.2162 - loss: 43.1267 - val_accuracy: 0.1800 - val_loss: 1.8888
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 610ms/step - accuracy: 0.2147 - loss: 1.7386 - val_accuracy: 0.1560 - val_loss: 1.6180
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 588ms/step - accuracy: 0.2270 - loss: 1.5951 - val_accuracy: 0.1540 - val_loss: 1.6225
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 620ms/step - accuracy: 0.2518 - loss: 1.5611 - val_accuracy: 0.1920 - val_loss: 1.6003
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 596ms/step - accuracy: 0.3144 - loss: 1.5363 - val_accuracy: 0.2340 - val_loss: 1.5633
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 585ms/step - accuracy: 0.3451 - loss: 1.4935 - val_accuracy: 0.2520 - val_loss: 1.5970
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [132]:
# Evaluate model
valLoss, valAcc = model.evaluate(validationGenerator)
print(f'Validation Loss: {valLoss}')
print(f'Validation Accuracy: {valAcc}')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step - accuracy: 0.2817 - loss: 1.5305
Validation Loss: 1.539275884628296
Validation Accuracy: 0.2759999930858612


In [133]:
# Save model
model.save('imageClassifierModel.h5')

