In [None]:
import os
import shutil
import random
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [5]:
def split_data(source_dir, dest_dir, split=(0.8, 0.1, 0.1), seed=42):
    random.seed(seed)
    labels = ['cm', 'non_cm']
    
    for label in labels:
        imgs = list(Path(source_dir, label).glob("*.png"))
        random.shuffle(imgs)
        
        n = len(imgs)
        n_train = int(split[0] * n)
        n_val = int(split[1] * n)
        
        splits = {
            'train': imgs[:n_train],
            'val': imgs[n_train:n_train+n_val],
            'test': imgs[n_train+n_val:]
        }

        for split_name, files in splits.items():
            split_dir = Path(dest_dir, split_name, label)
            split_dir.mkdir(parents=True, exist_ok=True)
            for img in files:
                shutil.copy(img, split_dir / img.name)

# Example usage
split_data("spectrograms", "split_data")

In [7]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # resize if needed
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # normalize to [-1, 1]
])

train_data = datasets.ImageFolder("split_data/train", transform=transform)
val_data = datasets.ImageFolder("split_data/val", transform=transform)
test_data = datasets.ImageFolder("split_data/test", transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

In [None]:
# Import numpy library for numerical operations
import numpy as np
# Import torch library for building and training neural networks
import torch
# Import nn module from torch for building neural network layers
from torch import nn
# Import torch multiprocessing module for parallel processing
import torch.multiprocessing
# Import SummaryWriter module from torch.utils.tensorboard for logging to TensorBoard
from torch.utils.tensorboard import SummaryWriter
# Import summary function from torchsummary for displaying model summary
from torchsummary import summary
# Import torchvision library for image processing
import torchvision
# Import pyplot module from matplotlib for plotting graphs
import matplotlib.pyplot as plt
# Import tqdm module for displaying progress bars
from tqdm.auto import tqdm
# Import default_timer function from timeit for measuring time taken for model training
from timeit import default_timer as timer

writer_path = 'runs/log_file_tensorboard'
# writer to log to tensorboard
writer = SummaryWriter(writer_path)

In [None]:
# NUM_WORKERS = 4 # number of worker used when loading data into dataloader
# DATASET_PATH = 'split_data' # path of our spectrogram dataset
IMAGE_SIZE = (128, 128) # image size
CHANNEL_COUNT = 3 # 3 channel as an image has 3 color (R,G,B)
ATTRIBUTION = ["cm", "non_cm"] # class labels
ACCURACY_THRESHOLD = 90 # accuracy at which to stop

In [None]:
# Define a neural network class that inherits from PyTorch nn.Module.
class neuralNetworkV1(nn.Module):
    # The __init__ method is used to declare the layers that will be used in the forward pass.
    def __init__(self):
        super().__init__() # required because our class inherit from nn.Module
        # First convolutional layer with 3 input channels for RGB images, 16 outputs (filters).
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1)
        # Second convolutional layer with 16 input channels to capture features from the previous layer, 16 outputs (filters).
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        # Third and fourth convolutional layers with 16 and 10 output channels respectively.
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(10, 10, kernel_size=3, stride=2, padding=1)
        # Max pooling layer to reduce feature complexity.
        self.pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # ReLU activation function for introducing non-linearity.
        self.relu = nn.ReLU()
        # Flatten the 2D output from the convolutional layers for the fully connected layer.
        self.flatten = nn.Flatten()
        # Fully connected layer connecting to 1D neurons, with 3 output features for 3 classes.
        self.linear = nn.Linear(in_features=480, out_features=3)
    
    # define how each data sample will propagate in each layer of the network
    def forward(self, x: torch.Tensor):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.pooling(x)
        x = self.relu(self.conv3(x))
        x = self.pooling(x)
        x = self.relu(self.conv4(x))
        x = self.flatten(x)
        try:
            x = self.linear(x)
        except Exception as e:
            print(f"Error : Linear block should take support shape of {x.shape} for in_features.")
        return x

our_model = neuralNetworkV1()

print("Model summary : ")
print(summary(our_model, (CHANNEL_COUNT, IMAGE_SIZE[0], IMAGE_SIZE[1])))

In [None]:
# display total time training
def display_training_time(start, end):
    total_time = end - start
    print(f"Training time : {total_time:.3f} seconds")
    return total_time

# Display training infos for each epochs
def display_training_infos(epoch, val_loss, train_loss, accuracy):
    val_loss = round(val_loss.item(), 2)
    train_loss = round(train_loss.item(), 2)
    accuracy = round(accuracy, 2)
    print(f"Epoch : {epoch}, Training loss : {train_loss}, Validation loss : {val_loss}, Accuracy : {accuracy} %")

In [None]:
# Calculates accuracy between truth labels and predictions.
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

# The core function for training the CNN
def train_neural_net(epochs, model, loss_func, optimizer, train_batches, val_batches):
    final_accuracy = 0
    for epoch in tqdm(range(epochs)):
        # training mode
        model.train()
        with torch.enable_grad():
            train_loss = 0
            for images, labels in train_batches:
                predictions = model(images)
                loss = loss_func(predictions, labels)
                train_loss += loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            train_loss /= len(train_batches)
            writer.add_scalar("training loss", train_loss, epoch)
        # evaluation mode
        val_loss, val_accuracy = 0, 0
        model.eval()
        with torch.inference_mode():
            for images, labels in val_batches:
                predictions = model(images)
                val_loss += loss_func(predictions, labels)
                val_accuracy += accuracy_fn(y_true=labels, y_pred=predictions.argmax(dim=1))
            val_loss /= len(val_batches)
            val_accuracy /= len(val_batches)
            writer.add_scalar("validation loss", val_loss, epoch)
            final_accuracy = val_accuracy
        display_training_infos(epoch+1, val_loss, train_loss, val_accuracy)
        writer.add_scalar("accuracy", val_accuracy, epoch)
        if val_accuracy >= ACCURACY_THRESHOLD:
            break
    return final_accuracy