In [None]:
%pip install torch torchvision --index-url https://download.pytorch.org/whl/cu127
%pip install matplotlib
%pip install numpy
%pip install tensorboard
%pip install scikit-learn

In [None]:
# Set Up
import os
import torch

from torchvision import transforms
import torchvision

import matplotlib.pyplot as plt
import numpy as np

from torch.utils.tensorboard import SummaryWriter

from DataLoader import DataLoader
from CNN import CNN
from Trainer import Trainer

def img_show(img):
    img = img / 2 + 0.5 # unnormalize
    np_img = img.cpu().numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))
    plt.show()

train_transform = transforms.Compose([transforms.Resize((224, 224)),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.RandomRotation(180),
                                      transforms.RandomAdjustSharpness(0), # Randomly Blur Image
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

test_transform = transforms.Compose([transforms.Resize((224, 224)),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
train_set = DataLoader(data_dir=os.getcwd() + "/fire_dataset", trans_width=224, trans_height=224).load(directory="/train", transform=train_transform, batch_size=32, shuffle=True, workers=2)
valid_set = DataLoader(data_dir=os.getcwd() + "/fire_dataset", trans_width=224, trans_height=224).load(transform=test_transform, batch_size=32, shuffle=True, workers=2)
test_set = DataLoader(data_dir=os.getcwd() + "/fire_dataset", trans_width=224, trans_height=224).load(transform=test_transform, batch_size=32, shuffle=False, workers=2)
model = CNN(log_dir=os.getcwd()+"/CNN", lr=1e-4).to(device)
trainer = Trainer(log_dir=os.getcwd()+"/CNN", n_epochs=2, device=device)

In [None]:
writer = SummaryWriter(log_dir=os.getcwd()+"/CNN")

In [None]:
%load_ext tensorboard
trainer.fit(writer=writer, model=model, data=train_set, valid=valid_set, use_lr_scheduler=True)

In [None]:
plt.clf()
plt.plot(getattr(trainer, "avg_train_loss"), label="Avg. Training Loss")
plt.plot(getattr(trainer, "avg_valid_loss"), label="Avg. Validation Loss")
plt.plot(getattr(trainer, "training_accuracy"), label="Training Accuracy (x100)%")
plt.plot(getattr(trainer, "validation_accuracy"), label="Validation Accuracy (x100)%")
plt.legend()
plt.savefig(os.getcwd() + "/CNN/CNN_Loss_Plot.jpg")

In [None]:
%tensorboard --logdir=./CNN/CNN_Arch_1

In [None]:
classes = train_set.dataset.classes
dataiter = iter(test_set)
images, labels = next(dataiter)

images = images.to(device)

img_show(torchvision.utils.make_grid(images))
model.eval()
output = model(images).to(device)
estimatedLabels = torch.max(output, 1).indices

print('Estimated Labels: ', ' '.join(f'{classes[estimatedLabels[j]]:5s}' for j in range(images.shape[0])))

As someone who works in the Fire Detection and Evacuation industry, I thought it would be interesting to train a model that can recognise if there is a fire in a given image. So I've decided to train a model to perform binary classification on images that contain fire or no fire. I decided to do binary classification as it is something I am familiar with, allowing me to focus more on the process with finetuning without worrying too much about how to load and train the model.

As I already have built a data loader and trainer, and confusion matrix class in the first assignment, I brought that across and made any improvements or adjustments I needed to make. I also installed any libraries I required.

I have decided to move the image tranforms out of the data loader, making it a bit more modular by passing in the transformation to use. I also have update the transformation I will use for training by introducing some data augmentation such as random flips, rotation and sharpness. I added the sharpness augmentation to simulate the possibility of coming from low quality images, such as screenshots of CCTV footage or equivalent, allowing this to be applied in real world scenarios.

Originally I was planning to try to implement a DenseNet architecture to address potential issues with exploding or vanishing gradients, however with the size of the dataset I have chosen, I think it may make the model too complex for the amount of data. So for the size of the dataset, it should require a smaller model. For this reason, I have decided to experiment around with some custom CNN architectures.

For my initial architecture, I have decided to use 3 blocks that consist of a convolutional layer, a ReLU activation and a max pool. I use ReLU as the activation function as it is the most commonly used activation function for modern neural networks. For the loss function, I have gone with torch's BCEWithLogitsLoss method. This method applies a Sigmoid activation to the outputs of the network and then calculates the Binary Cross Entropy Loss. According to torch's documentation, this is more stable than applying the Sigmoid manually and then calling BCELoss instead (https://docs.pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html).

An issue I came across early was with my data loader. The dataset I chose didn't come with pre-organised training, validation, and test sets, so I knew I would have to split them myself. Instinctively, I did this in the data loader load method using the random_split method from torch. However, as I thought more deeply about it, I realised that if I needed to reload the dataset to continue training further, or restart training from a previous checkpoint, there would be no guarantees that I would have the same split, potentially causing the model to train with the test data. My solution was to split the dataset manually because of the size not being too large.

I reorganised the dataset into 3 folders, one for training, one for validation, and one for testing. Inside the images are split into the 2 classes, fire and no fire. I used a 70/20/10 split for training, validation, and testing.