# Imports

In [None]:
import torch
import random
import torch.nn as nn
import cv2
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
import time
import os
from torch.utils.tensorboard import SummaryWriter
from torchvision.models import ResNet50_Weights
from collections import OrderedDict

In [None]:
np.random.seed(123)  # for replication
os.makedirs('./models', exist_ok=True)

# Helper functions

In [None]:
def plot_classes_preds(images, labels, preds, probs):
    # plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(15, 5))
    for idx in np.arange(4):
        ax = fig.add_subplot(1, 4, idx + 1, xticks=[], yticks=[])
        norm_img = cv2.normalize(images[idx].cpu().numpy(), None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
        rgb_img = np.transpose(norm_img, (1, 2, 0)).astype(np.uint8)
        plt.imshow(rgb_img)
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            preds[idx],
            probs[idx] * 100.0,
            labels[idx]),
            color=("green" if preds[idx] == labels[idx].item() else "red"))
    return fig


def summary(model, input_size, batch_size=-1, device="cuda"):
    output_str = ''

    def register_hook(module):

        def hook(module, input, output):
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        if (
                not isinstance(module, nn.Sequential)
                and not isinstance(module, nn.ModuleList)
                and not (module == model)
        ):
            hooks.append(module.register_forward_hook(hook))

    device = device.lower()
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # multiple inputs to the network
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # batch_size of 2 for batchnorm
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
    # print(type(x[0]))

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    # print(x.shape)
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    output_str += "----------------------------------------------------------------\n"
    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #\n")
    output_str += line_new
    output_str += "================================================================\n"
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>20}  {:>25} {:>15}\n".format(
            layer,
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        output_str += line_new

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    output_str += "================================================================\n"
    output_str += ("Total params: {0:,}\n".format(total_params))
    output_str += ("Trainable params: {0:,}\n".format(trainable_params))
    output_str += ("Non-trainable params: {0:,}\n".format(total_params - trainable_params))
    output_str += "----------------------------------------------------------------\n"
    output_str += ("Input size (MB): %0.2f\n" % total_input_size)
    output_str += ("Forward/backward pass size (MB): %0.2f\n" % total_output_size)
    output_str += ("Params size (MB): %0.2f\n" % total_params_size)
    output_str += ("Estimated Total Size (MB): %0.2f\n" % total_size)
    output_str += "----------------------------------------------------------------\n"
    print(output_str)
    return output_str

### Loading the train dataset

In [None]:
train_dir = './dataset/GTSRB/train'

train_dataset = datasets.ImageFolder(train_dir, ResNet50_Weights.IMAGENET1K_V2.transforms())
train_size = len(train_dataset)
class_names = train_dataset.classes

print('Train size:', train_size)
print('Class names:', class_names)

### Loading the test dataset

In [None]:
test_dir = './dataset/GTSRB/test'

test_dataset = datasets.ImageFolder(test_dir, ResNet50_Weights.IMAGENET1K_V2.transforms())
test_size = len(test_dataset)
class_names = test_dataset.classes

print('Test size:', train_size)
print('Class names:', class_names)

# Defining the training phase

In [None]:
def train_model(device, model, criterion, optimizer, scheduler, train_loader, val_loader, num_epochs=25,
                model_name='trained_model'):
    since = time.time()
    time_train = 0
    time_val = 0

    for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0

            # Choose the appropriate data loader
            if phase == 'train':
                model.train()  # Set model to training mode
                data_total_steps = len(train_loader)
                data_loader = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                data_total_steps = len(val_loader)
                data_loader = val_loader

            for i, (images, labels) in enumerate(data_loader):
                # time_t = epoch * len(data_loader) * i + i

                images = images.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                # Track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images)
                    softmax_outputs = F.softmax(outputs, dim=1)
                    probs, preds = torch.max(softmax_outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels.data)

                # Calculate entropy with epsilon
                epsilon = 1e-10  # Small epsilon value to avoid zero probabilities
                entropy = -torch.sum(softmax_outputs * torch.log2(softmax_outputs + epsilon), dim=1).mean()

                #prints the stats every 20 steps (20 batches performed)
                if (i + 1) % int(data_total_steps / 8) == 0:
                    print(
                        f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{data_total_steps}], Loss: {loss.item():.4f}')

                    # Log image predictions
                    selected_indices = random.sample(range(len(images)), 4)  # Select 4 random indices
                    selected_images = images[selected_indices]
                    selected_labels = labels[selected_indices]
                    selected_preds = preds[selected_indices]
                    selected_probs = probs[selected_indices]
                    if phase == 'train':
                        writer.add_figure('Training/Training Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=time_train)
                    else:
                        writer.add_figure('Training/Validation Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=time_val)

                # Log scalars
                if phase == 'train':
                    writer.add_scalar('Training/Training Loss',
                                      loss.item(),
                                      time_train)
                    writer.add_scalar('Policy/Entropy',
                                      entropy.item(),
                                      time_train)
                    writer.add_scalar('Policy/Learning Rate',
                                      np.array(scheduler.get_last_lr()),
                                      time_train)
                    time_train += 1
                else:
                    writer.add_scalar('Training/Validation Loss',
                                      loss.item(),
                                      time_val)
                    time_val += 1

            epoch_loss = running_loss / len(data_loader.dataset)
            epoch_acc = running_corrects.double() / len(data_loader.dataset)

            if phase == 'train':
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Train phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Training Accuracy',
                                  epoch_acc,
                                  epoch)
                if (epoch + 1) % max(int(num_epochs / 5), 1) == 0:  # checkpoint the model
                    print("----> model checkpoint...")
                    torch.save(model, f'./models/trained_model_{model_name}_epoch_{epoch + 1}.pth')
            else:
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Validation phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Validation Accuracy',
                                  epoch_acc,
                                  epoch)
                scheduler.step(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return model


def create_dynamic_network(num_features, num_classes, neuron_list=None, dropout_values=None):
    if neuron_list is None:
        neuron_list = []
    layers = []
    num_layers = len(neuron_list)
    # Input layer to first hidden layer
    if num_layers > 0:
        layers.append(nn.Linear(num_features, neuron_list[0]))
        layers.append(nn.ReLU())
        if dropout_values[0] != 0:
            layers.append(nn.Dropout(dropout_values[0]))

    # Additional hidden layers
    for i in range(1, num_layers):
        layers.append(nn.Linear(neuron_list[i - 1], neuron_list[i]))
        layers.append(nn.ReLU())
        if dropout_values[i] != 0:
            layers.append(nn.Dropout(dropout_values[i]))

    # Always include the final specified layer
    layers.append(nn.Linear(neuron_list[-1] if num_layers > 0 else num_features, num_classes))
    # layers.append(nn.Softmax(dim=1)) not needed cause cross entropy criterion

    return nn.Sequential(*layers)

## Training Setup

In [None]:
# Setting device for the computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
hyperparams = {
    "num_epochs": 15,
    "batch_size": 100,
    #optimizer
    "learning_rate": 0.001,
    "momentum": 0.0,
    "alpha": 0.99,
    "beta1": 0.9,
    "beta2": 0.999,
    "epsilon": 1e-07,
    "weight_decay": 0,
    #scheduler
    "decay_rate": 0.5,
    #nnet
    # None layers means no hidden layers, just one layer from conv to classes: conv -> layer -> softmax
    # Multiple layers can just be se as a list [500,400,300]
    "neuron_layer_list": [512, 256, 128],
    "dropout_values": [0.25, 0.25, 0.5],
}

### Setting up the model using ResNet50 as backbone

In [None]:
# default `log_dir` is "runs" - we'll be more specific here
model_name = 'testfix'
writer = SummaryWriter(f'runs/{model_name}')

# Convert config dictionary to a formatted string
hyper_str = "\n".join(f"{key}: {value}\n" for key, value in hyperparams.items())
writer.add_text('Configuration', hyper_str)

# Create DataLoader instances for training and validation
train_loader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=True)

# Model initialization
model = torchvision.models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
for param in model.parameters():
    param.requires_grad = False
# Define the layers you want to add
model.fc = create_dynamic_network(model.fc.in_features, num_classes=43, neuron_list=hyperparams["neuron_layer_list"],
                                  dropout_values=hyperparams["dropout_values"])
model = model.to(device)
writer.add_text("Model Summary", summary(model, input_size=(3, 224, 224)))

# Define loss function, optimizer, etc.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=hyperparams["learning_rate"])
# optimizer = torch.optim.SGD(model.fc.parameters(), lr=hyperparams["learning_rate"], momentum=hyperparams["momentum"],
#                             weight_decay=hyperparams["weight_decay"])
# optimizer = torch.optim.SGD(model.fc.parameters(), lr=hyperparams["learning_rate"], momentum=hyperparams["momentum"],
#                             weight_decay=hyperparams["weight_decay"], nesterov=True)
# optimizer = torch.optim.RMSprop(model.fc.parameters(), lr=hyperparams["learning_rate"],
#                                 weight_decay=hyperparams["weight_decay"], alpha=hyperparams["alpha"],
#                                 eps=hyperparams["epsilon"])
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=1000, factor=hyperparams["decay_rate"], min_lr=1e-4,
                                           mode='min', threshold=1e-4)

### Train the model

In [None]:
# Train model
trained_model = train_model(device=device, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler,
                            train_loader=train_loader, val_loader=val_loader, num_epochs=hyperparams["num_epochs"],
                            model_name=model_name)

# Saving the trained model

In [None]:
print('Finished Training')
PATH = f'./models/trained_model_{model_name}_final.pth'
torch.save(trained_model, PATH)