# Imports

In [10]:
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import copy
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision.datasets import ImageFolder
import tqdm
import torch.nn.functional as F
import torchvision.utils as vutils
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.tensorboard import SummaryWriter
from torchvision.models import ResNet152_Weights
from torchvision.models import ResNet50_Weights
from torchvision.transforms import InterpolationMode

import torch_directml

In [11]:
np.random.seed(123)  # for replication
os.makedirs('./models', exist_ok=True)

# Helper functions

In [12]:
def plot_classes_preds(images, labels, preds, probs):
    # plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(15, 5))
    for idx in np.arange(4):
        ax = fig.add_subplot(1, 4, idx + 1, xticks=[], yticks=[])
        plt.imshow(np.transpose(images[idx].cpu().numpy(), (1, 2, 0)))  # because is a tensor 
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            preds[idx],
            probs[idx] * 100.0,
            labels[idx]),
            color=("green" if preds[idx] == labels[idx].item() else "red"))
    return fig

### Loading the train dataset

In [13]:
# Useless now since we use the required transformations for the ResNet50
# train_transform = transforms.Compose([
#     #naive transformation
#     transforms.Resize((224, 224)),
#     transforms.ToTensor()
# ])
train_dir = './dataset/GTSRB/train'

train_dataset = datasets.ImageFolder(train_dir, ResNet50_Weights.IMAGENET1K_V2.transforms())
train_size = len(train_dataset)
class_names = train_dataset.classes

print('Train size:', train_size)
print('Class names:', class_names)

Train size: 39209
Class names: ['00000', '00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00013', '00014', '00015', '00016', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042']


### Loading the test dataset

In [14]:
# Useless now since we use the required transformations for the ResNet50
# test_transform = transforms.Compose([
#     #naive transformation
#     transforms.Resize((224, 224)),
#     transforms.ToTensor()
# ])

test_dir = './dataset/GTSRB/test'

test_dataset = datasets.ImageFolder(test_dir, ResNet50_Weights.IMAGENET1K_V2.transforms())
test_size = len(test_dataset)
class_names = test_dataset.classes

print('Test size:', train_size)
print('Class names:', class_names)

Test size: 39209
Class names: ['00000', '00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00013', '00014', '00015', '00016', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042']


# Defining the training phase

In [15]:
def train_model(device, model, criterion, optimizer, scheduler, train_loader, val_loader, num_epochs=25,
                model_name='trained_model'):
    since = time.time()
    time_train = 0
    time_val = 0
    
    # Save the initial model
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:

            # Choose the appropriate data loader
            if phase == 'train':
                model.train()  # Set model to training mode
                data_total_steps = len(train_loader)
                data_loader = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                data_total_steps = len(val_loader)
                data_loader = val_loader
                
            running_loss = 0.0
            running_corrects = 0
            
            for i, (images, labels) in enumerate(data_loader):
                # time_t = epoch * len(data_loader) * i + i

                images = images.to(device)
                labels = labels.to(device)
                
                # Zero the parameter gradients
                optimizer.zero_grad()
                
                # Forward pass
                # Track history if only in train    
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images)
                    _, preds = torch.max(outputs, 1)
                    probs = [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, outputs)]
                    loss = criterion(outputs, labels)
                    
                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # Statistics
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels.data)
                

                # Calculate entropy with epsilon
                softmax_outputs = F.softmax(outputs, dim=1)
                epsilon = 1e-10  # Small epsilon value to avoid zero probabilities
                entropy = -torch.sum(softmax_outputs * torch.log2(softmax_outputs + epsilon), dim=1).mean()

                #prints the stats every 20 steps (20 batches performed)
                if (i + 1) % int(data_total_steps / 8) == 0:
                    print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{data_total_steps}], Loss: {loss.item():.4f}')

                    # Log image predictions
                    selected_indices = random.sample(range(len(images)), 4)  # Select 4 random indices
                    selected_images = images[selected_indices]
                    selected_labels = labels[selected_indices]
                    selected_preds = preds[selected_indices]
                    selected_probs = [probs[i] for i in selected_indices]
                    if phase == 'train':
                        writer.add_figure('Training/Training Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=time_train)
                    if phase == 'val':
                        writer.add_figure('Training/Validation Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=time_val)

                # Log scalars
                if phase == 'train':
                    writer.add_scalar('Training/Training Loss',
                                      loss.item(),
                                      time_train)
                    writer.add_scalar('Policy/Entropy',
                                      entropy.item(),
                                      time_train)
                    writer.add_scalar('Policy/Learning Rate',
                                      np.array(optimizer.param_groups[0]["lr"]),
                                      time_train)
                    time_train += 1
                if phase == 'val':
                    writer.add_scalar('Training/Validation Loss',
                                      loss.item(),
                                      time_val)
                    time_val += 1

            epoch_loss = running_loss / len(data_loader.dataset)
            epoch_acc = running_corrects.double() / len(data_loader.dataset)             
            
            if phase == 'train':
                scheduler.step(epoch_loss)
                
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Train phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Training Accuracy',
                                  epoch_acc,
                                  epoch)
                if (epoch + 1) % max(int(num_epochs / 5), 1) == 0:  # checkpoint the model
                    print("----> model checkpoint...")
                    torch.save(model, f'./models/trained_model_{model_name}_epoch_{epoch + 1}.pth')
            if phase =='val':
                # If the best accuracy is reached then save this model
                if epoch_acc>best_acc:
                    best_acc = epoch_acc
                    best_model_wts=copy.deepcopy(model.state.dict())
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Validation phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Validation Accuracy',
                                  epoch_acc,
                                  epoch)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    
    print('Best val Acc:{:.4f}'.format(best_acc))
    # Return the model with the best accuracy in the validation
    model.load_state_dict(best_model_wts)
    return model


def create_dynamic_network(num_features, num_classes, num_layers=0, num_neurons=1):
    layers = []
    # Input layer to first hidden layer
    if num_layers > 0:
        layers.append(nn.Linear(num_features, num_neurons))
        layers.append(nn.ReLU())

    # Additional hidden layers
    for _ in range(1, num_layers):
        layers.append(nn.Linear(num_neurons, num_neurons))
        layers.append(nn.ReLU())

    # Always include the final specified layer
    layers.append(nn.Linear(num_neurons if num_layers > 0 else num_features, num_classes))

    return nn.Sequential(*layers)

def kaggle_nn(num_features,num_classes):
    layers = [nn.Flatten(),
              
              nn.Linear(num_features, 512),
              nn.ReLU(),
              
              nn.Linear(512, 256),
              nn.ReLU(),
              
              nn.Linear(256, 128),
              nn.ReLU(),
              
              nn.Linear(128, num_classes)]

    return nn.Sequential(*layers)

## Training Setup

In [16]:
# Setting device for the computation
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch_directml.device()

# Hyperparameters
hyperparams = {
    "num_epochs": 10,
    "batch_size": 100,
    #optimizer
    "learning_rate": 1e-4,
    "momentum": 0.9,
    "alpha": 0.99,
    "beta1": 0.9,
    "beta2": 0.999,
    "epsilon": 1e-08,
    "weight_decay": 0,
    #scheduler
    "decay_rate": 0.5,
    #nnet
    "num_layers": 1,  # 0 layers means no hidden layers, just one layer from conv to classes: conv -> layer -> softmax
    "num_neurons": 400,
}

### Setting up the model using ResNet50 as backbone

In [17]:
# default `log_dir` is "runs" - we'll be more specific here
model_name = 'RMSprop'
writer = SummaryWriter(f'runs/{model_name}')

# Convert config dictionary to a formatted string
hyper_str = "\n".join(f"{key}: {value}\n" for key, value in hyperparams.items())
writer.add_text('Configuration', hyper_str)

# Create DataLoader instances for training and validation
train_loader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=True)

# Model initialization
model = torchvision.models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

for param in model.parameters():
    param.requires_grad = False
# Define the layers you want to add
#model.fc = create_dynamic_network(model.fc.in_features, 43, num_layers=hyperparams["num_layers"],
#                                  num_neurons=hyperparams["num_neurons"])
model.fc = kaggle_nn(model.fc.in_features,43)

model = model.to(device)

# Define loss function, optimizer, etc.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.fc.parameters(),lr = hyperparams["learning_rate"]) 
#optimizer = torch.optim.Adam(model.fc.parameters(), lr=hyperparams["learning_rate"],
#                             betas=(hyperparams["beta1"], hyperparams["beta2"]),
#                             weight_decay=hyperparams["weight_decay"], eps=hyperparams["epsilon"])
# optimizer = torch.optim.SGD(model.fc.parameters(), lr=hyperparams["learning_rate"], momentum=hyperparams["momentum"],
#                             weight_decay=hyperparams["weight_decay"])
# optimizer = torch.optim.SGD(model.fc.parameters(), lr=hyperparams["learning_rate"], momentum=hyperparams["momentum"],
#                             weight_decay=hyperparams["weight_decay"], nesterov=True)
# optimizer = torch.optim.RMSprop(model.fc.parameters(), lr=hyperparams["learning_rate"],
#                                 weight_decay=hyperparams["weight_decay"], alpha=hyperparams["alpha"],
#                                 eps=hyperparams["epsilon"])
scheduler = lr_scheduler.LinearLR(optimizer)

### Train the model

In [None]:
# Train model
trained_model = train_model(device=device, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler,
                            train_loader=train_loader, val_loader=val_loader, num_epochs=hyperparams["num_epochs"],
                            model_name=model_name)

----------
Epoch 1/10
----------


# Saving the trained model

In [None]:
print('Finished Training')
PATH = f'./models/trained_model_{model_name}_final.pth'
torch.save(trained_model, PATH)