In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import os
from matplotlib import pyplot as plt

In [None]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.transform:
            image = Image.fromarray(image)
            image = self.transform(image)
        else:
            image = torch.from_numpy(image).float().permute(2, 0, 1)
        
        label = torch.tensor(label, dtype=torch.long)

        return image, label

In [None]:
# Load and preprocess the data
def load_data(base_path):
    data = []
    labels = []
    classes = 43
    for i in tqdm(range(classes)): 
        path = os.path.join(base_path, str(i)) 
        images = os.listdir(path) 
        for a in images: 
            image = Image.open(path + '\\' + a) 
            image = image.resize((30,30))
            image = np.array(image) 
            data.append(image) 
            labels.append(i + 1) 

    data = np.array(data)
    labels = np.array(labels)
    return data, labels

def load_custom_data(base_path):
    data = []
    labels = []
    classes = 44
    for i in tqdm(range(classes)): 
        path = os.path.join(base_path, str(i)) 
        images = os.listdir(path) 
        for a in images: 
            image = Image.open(path + '\\' + a) 
            image = image.resize((30,30))
            image = np.array(image) 
            data.append(image) 
            labels.append(i) 

    data = np.array(data)
    labels = np.array(labels)
    return data, labels

def load_test_data(base_path):
    image_label_map = {}
    gt_path = os.path.join(base_path, 'GT-final_test.csv')
    for line in open(gt_path):
        if line.startswith('Filename'):
            continue
        parts = line.strip().split(';')
        image_label_map[parts[0]] = int(parts[7]) + 1
    images = os.listdir(base_path)
    data = []
    labels = []
    for image_name in tqdm(images):
        if image_name.endswith('.png'):
            image_path = os.path.join(base_path, image_name)
            image = Image.open(image_path)
            image = image.resize((30,30))
            image = np.array(image)
            data.append(image)
            labels.append(image_label_map[image_name.replace('.png', '.ppm')])
    data = np.array(data)
    labels = np.array(labels)
    return data, labels

In [None]:
train_images, train_labels = load_data("GTSRBDataset\\Train")
custom_train_images, custom_train_labels = load_custom_data("FineTuneDataset")
test_images, test_labels = load_test_data("GTSRBDataset\\Test")

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
# Create datasets and dataloaders
train_dataset = CustomDataset(images=train_images, labels=train_labels, transform=transform)
test_dataset = CustomDataset(images=test_images, labels=test_labels, transform=transform)
finetune_dataset = CustomDataset(images=custom_train_images, labels=custom_train_labels, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
finetune_dataloader = DataLoader(finetune_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Define the model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.25)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 256)
        self.dropout2 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 44)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout(x)
        x = torch.relu(self.conv3(x))
        x = torch.relu(self.conv4(x))
        x = self.pool(x)
        x = self.dropout(x)
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [None]:
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
visualize = False

In [None]:
# GTSRB
num_epochs = 10
accuracies = []
losses = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if visualize:
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in test_dataloader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            accuracy = correct / total
            accuracies.append(accuracy)
            losses.append(running_loss/len(train_dataloader))

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss_Train: {running_loss/len(train_dataloader)}, Accuracy_Test: {accuracy}")
    torch.save(model, f"Models/classification_{epoch}.pth")


In [None]:
#Fine tune dataset
optimizer = optim.Adam(model.parameters(), lr=0.0002)
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(tqdm(finetune_dataloader)):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if visualize:
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in test_dataloader:
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            accuracy = correct / total
            accuracies.append(accuracy)
            losses.append(running_loss/len(train_dataloader))

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss_Train: {running_loss/len(train_dataloader)}, Accuracy_Test: {accuracy}")
    
    torch.save(model, f"Models/classification_{10 + epoch}.pth")


In [None]:
if visualize:
    iterations = list(range(1, len(accuracies) + 1))

    # Create a figure and a set of subplots
    fig, ax1 = plt.subplots()

    # Plotting accuracies
    color = 'tab:blue'
    ax1.set_xlabel('Iteration')
    ax1.set_ylabel('Accuracy', color=color)
    ax1.plot(iterations, accuracies, color=color, label='Accuracy')
    ax1.tick_params(axis='y', labelcolor=color)

    # Instantiate a second y-axis that shares the same x-axis
    ax2 = ax1.twinx()  
    color = 'tab:red'
    ax2.set_ylabel('Loss', color=color)
    ax2.plot(iterations, losses, color=color, label='Loss')
    ax2.tick_params(axis='y', labelcolor=color)

    # Add a title and show the plot
    fig.tight_layout()  # To prevent the labels from overlapping
    plt.title('Training Progress')
    plt.show()

In [None]:
# Evaluate the model on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_dataloader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total}%")