# Imports

In [41]:
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import copy
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision.datasets import ImageFolder
import tqdm
import torch.nn.functional as F
import torchvision.utils as vutils
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.tensorboard import SummaryWriter
from torchvision.models import ResNet152_Weights

# Helper functions

In [42]:
def plot_classes_preds(images, labels, preds, probs):
    # plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(15, 5))
    for idx in np.arange(4):
        ax = fig.add_subplot(1, 4, idx + 1, xticks=[], yticks=[])
        plt.imshow(np.transpose(images[idx].cpu().numpy(), (1, 2, 0)))  # because is a tensor 
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            preds[idx],
            probs[idx] * 100.0,
            labels[idx]),
            color=("green" if preds[idx] == labels[idx].item() else "red"))
    return fig

### Loading the train dataset

In [43]:
train_transform = transforms.Compose([
    #TODO:think a better transformation pipeline
    #naive transformation
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dir = './dataset/GTSRB/train'

train_dataset = datasets.ImageFolder(train_dir, train_transform)
train_size = len(train_dataset)
class_names = train_dataset.classes

print('Train size:', train_size)
print('Class names:', class_names)

Train size: 39209
Class names: ['00000', '00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00013', '00014', '00015', '00016', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042']


### Loading the test dataset

In [44]:
test_transform = transforms.Compose([
    #TODO:think a better transformation pipeline
    #naive transformation
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

test_dir = './dataset/GTSRB/test'

test_dataset = datasets.ImageFolder(test_dir, test_transform)
test_size = len(test_dataset)
class_names = test_dataset.classes

print('Test size:', train_size)
print('Class names:', class_names)

Test size: 39209
Class names: ['00000', '00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00013', '00014', '00015', '00016', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042']


# Defining the training phase

In [45]:
def train_model(device, model, criterion, optimizer, scheduler, train_loader, val_loader, num_epochs=25):
    since = time.time()

    for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0
            running_corrects = 0

            # Choose the appropriate data loader
            if phase == 'train':
                model.train()  # Set model to training mode
                data_total_steps = len(train_loader)
                data_loader = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                data_total_steps = len(val_loader)
                data_loader = val_loader

            for i, (images, labels) in enumerate(data_loader):
                images = images.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                # Track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images)
                    _, preds = torch.max(outputs, 1)
                    probs = [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, outputs)]
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels.data)

                # Calculate entropy with epsilon
                softmax_outputs = F.softmax(outputs, dim=1)
                epsilon = 1e-10  # Small epsilon value to avoid zero probabilities
                entropy = -torch.sum(softmax_outputs * torch.log2(softmax_outputs + epsilon), dim=1).mean()

                # Log scalars
                if phase == 'train':
                    writer.add_scalar('Training/Training Loss',
                                      loss.item(),
                                      epoch * len(data_loader) + i)
                    writer.add_scalar('Policy/Entropy',
                                      entropy.item(),
                                      epoch * len(data_loader) + i)
                    writer.add_scalar('Policy/Learning Rate',
                                      np.array(scheduler.get_last_lr()),
                                      epoch * len(data_loader) + i)
                else:
                    writer.add_scalar('Training/Validation Loss',
                                      loss.item(),
                                      epoch * len(data_loader) + i)

                #prints the stats every 20 steps (20 batches performed)
                if (i + 1) % int(data_total_steps / 8) == 0:
                    print(
                        f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{data_total_steps}], Loss: {loss.item():.4f}')

                    # Log image predictions
                    selected_indices = random.sample(range(len(images)), 4)  # Select 4 random indices
                    selected_images = images[selected_indices]
                    selected_labels = labels[selected_indices]
                    selected_preds = preds[selected_indices]
                    selected_probs = [probs[i] for i in selected_indices]
                    if phase == 'train':
                        writer.add_figure('Training/Training Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=epoch * len(data_loader) + i)
                    else:
                        writer.add_figure('Training/Validation Predictions',
                                          plot_classes_preds(selected_images, selected_labels, selected_preds,
                                                             selected_probs),
                                          global_step=epoch * len(data_loader) + i)

            epoch_loss = running_loss / len(data_loader.dataset)
            epoch_acc = running_corrects.double() / len(data_loader.dataset)

            if phase == 'train':
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Train phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Training Accuracy',
                                  running_corrects.double() / len(data_loader.dataset),
                                  epoch * len(data_loader))
                scheduler.step()
            else:
                print('{} Epoch {} Loss: {:.4f} Acc: {:.4f}'.format(
                    'Validation phase - ', epoch + 1, epoch_loss, epoch_acc))
                writer.add_scalar('Training/Validation Accuracy',
                                  running_corrects.double() / len(data_loader.dataset),
                                  epoch * len(data_loader))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return model


def create_dynamic_network(num_features, num_classes, num_layers=0, num_neurons=1):
    layers = []
    # Input layer to first hidden layer
    if num_layers > 0:
        layers.append(nn.Linear(num_features, num_neurons))
        layers.append(nn.ELU())

    # Additional hidden layers
    for _ in range(1, num_layers):
        layers.append(nn.Linear(num_neurons, num_neurons))
        layers.append(nn.ELU())

    # Always include the final specified layer
    layers.append(nn.Linear(num_neurons if num_layers > 0 else num_features, num_classes))

    return nn.Sequential(*layers)

## Training Setup

In [46]:
# Setting device for the computation
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 32
batch_size = 100
learning_rate = 0.05
step_size = 2  # After how many epochs to apply the decay rate
decay_rate = 0.9  # new_lr = Decay rate * learning rate

num_layers = 1  # 0 layers means no hidden layers, just one layer from conv to classes: conv -> layer -> softmax
num_neurons = 100

### Setting up the model using ResNet152 as backbone

In [47]:
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/TSR-SGD')

# Assuming train_dataset is your full dataset
indices = list(range(train_size))
split = int(np.floor(0.7 * train_size))  # 70% train, 30% validation
np.random.shuffle(indices)  # Shuffle the indices if needed

# Split indices into training and validation sets
train_indices, val_indices = indices[:split], indices[split:]

# Create sampler objects using the subset indices
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# Create DataLoader instances for training and validation
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)

# Model initialization
model = torchvision.models.resnet152(weights=ResNet152_Weights.IMAGENET1K_V2)
for param in model.parameters():
    param.requires_grad = False
# Define the layers you want to add
model.fc = create_dynamic_network(model.fc.in_features, 43, num_layers=num_layers, num_neurons=num_neurons)
model = model.to(device)

# Define loss function, optimizer, etc.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=decay_rate)

### Train the model

In [48]:
# Train model
trained_model = train_model(device=device, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler,
                            train_loader=train_loader, val_loader=val_loader, num_epochs=num_epochs)

----------
Epoch 1/32
----------
Epoch [1/32], Step [34/275], Loss: 3.1675
Epoch [1/32], Step [68/275], Loss: 2.6375
Epoch [1/32], Step [102/275], Loss: 2.2682
Epoch [1/32], Step [136/275], Loss: 2.2304
Epoch [1/32], Step [170/275], Loss: 2.1089
Epoch [1/32], Step [204/275], Loss: 1.6610
Epoch [1/32], Step [238/275], Loss: 1.5974
Epoch [1/32], Step [272/275], Loss: 1.5289
Train phase -  Epoch 1 Loss: 1.5789 Acc: 0.3060
Epoch [1/32], Step [14/118], Loss: 1.5068
Epoch [1/32], Step [28/118], Loss: 1.4412
Epoch [1/32], Step [42/118], Loss: 1.6295
Epoch [1/32], Step [56/118], Loss: 1.6437
Epoch [1/32], Step [70/118], Loss: 1.2916
Epoch [1/32], Step [84/118], Loss: 1.7792
Epoch [1/32], Step [98/118], Loss: 1.4294
Epoch [1/32], Step [112/118], Loss: 1.4893
Validation phase -  Epoch 1 Loss: 0.4685 Acc: 0.1738
----------
Epoch 2/32
----------
Epoch [2/32], Step [34/275], Loss: 1.3856
Epoch [2/32], Step [68/275], Loss: 1.3685
Epoch [2/32], Step [102/275], Loss: 1.3270
Epoch [2/32], Step [136/275

# Saving the trained model

In [49]:
print('Finished Training')
os.makedirs('./models', exist_ok=True)
PATH = './models/trained_model.pth'
torch.save(trained_model, PATH)

Finished Training


# Loading the model

In [50]:
trained_model = torch.load('./models/trained_model.pth', map_location=device)

# Evaluating the model

In [51]:
def test_model(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    running_corrects = 0

    # Disable gradient calculation to speed up the process and reduce memory usage
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass to get output/logits
            outputs = model(images)

            # Get predictions from the maximum value
            _, preds = torch.max(outputs, 1)

            # Increment the correct predictions count
            running_corrects += torch.sum(preds == labels.data)

            # Optionally print progress every 250 batches
            if (i + 1) % 250 == 0:
                print(f'Evaluating: [{i + 1}/{len(dataloader)}],  Correct classified: {running_corrects}/{i + 1}')

    # Calculate the accuracy by dividing the number of correct predictions by the dataset size
    test_acc = running_corrects.double() / len(dataloader)
    print(f'Test Acc: {test_acc:.4f}, Correct classified: {running_corrects}/{len(dataloader)}')

    return test_acc

In [52]:
test_loader = DataLoader(test_dataset, shuffle=True)

test_model(trained_model, test_loader, device)

Evaluating: [250/12630],  Correct classified: 162/250
Evaluating: [500/12630],  Correct classified: 329/500
Evaluating: [750/12630],  Correct classified: 483/750
Evaluating: [1000/12630],  Correct classified: 625/1000
Evaluating: [1250/12630],  Correct classified: 781/1250
Evaluating: [1500/12630],  Correct classified: 916/1500
Evaluating: [1750/12630],  Correct classified: 1079/1750
Evaluating: [2000/12630],  Correct classified: 1216/2000
Evaluating: [2250/12630],  Correct classified: 1355/2250
Evaluating: [2500/12630],  Correct classified: 1502/2500
Evaluating: [2750/12630],  Correct classified: 1648/2750
Evaluating: [3000/12630],  Correct classified: 1803/3000
Evaluating: [3250/12630],  Correct classified: 1941/3250
Evaluating: [3500/12630],  Correct classified: 2073/3500
Evaluating: [3750/12630],  Correct classified: 2236/3750
Evaluating: [4000/12630],  Correct classified: 2382/4000
Evaluating: [4250/12630],  Correct classified: 2524/4250
Evaluating: [4500/12630],  Correct classifi

tensor(0.5960, device='cuda:0', dtype=torch.float64)