# Pipeline for training and evaluating a CNN model for traffic sign recognition
Notebook contains the following steps:
1. Data loading and preprocessing
2. Model definition
3. Training and evaluation
4. Saving results and model to Google Cloud Storage
5. Hyperparameter tuning


In [1]:
# required for logging
!pip install tensorboard

Collecting tensorboard
  Using cached tensorboard-2.16.2-py3-none-any.whl.metadata (1.6 kB)
Collecting markdown>=2.6.8 (from tensorboard)
  Using cached Markdown-3.6-py3-none-any.whl.metadata (7.0 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)
  Using cached tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)
Collecting werkzeug>=1.0.1 (from tensorboard)
  Using cached werkzeug-3.0.3-py3-none-any.whl.metadata (3.7 kB)
Using cached tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
Using cached Markdown-3.6-py3-none-any.whl (105 kB)
Using cached tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)
Using cached werkzeug-3.0.3-py3-none-any.whl (227 kB)
Installing collected packages: werkzeug, tensorboard-data-server, markdown, tensorboard
Successfully installed markdown-3.6 tensorboard-2.16.2 tensorboard-data-server-0.7.2 werkzeug-3.0.3


In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split, DataLoader, Subset, ConcatDataset
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import numpy as np
from sklearn.metrics import confusion_matrix
from google.cloud import storage
import random
import math
from torch.optim.lr_scheduler import StepLR
import datetime

In [3]:
def set_seed(seed=42):
    """Sets the seed for reproducibility."""
    # Python RNG
    random.seed(seed)
    
    # PyTorch RNGs
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
    # Numpy RNG
    np.random.seed(seed)
    
    # OS RNG
    os.environ['PYTHONHASHSEED'] = str(seed)

def worker_init_fn(worker_id):    
    """Ensure that the data loading process is deterministic."""
    np.random.seed(np.random.get_state()[1][0] + worker_id)

set_seed(42)

In [4]:
project_id = 'deep-learning-420208'

In [5]:
# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


## Data loading and preprocessing
AugmentationTrafficSignLoader class is used to load the augmented datasets and calculate the mean and standard deviation of the training data. Can load multiple augmentation datasets and calculate the mean and standard deviation for each dataset.

In [6]:
class AugmentationTrafficSignLoader:
    def __init__(self, root):
        self.root = root
        self.augmentations = [folder for folder in os.listdir(root)
                              if os.path.isdir(os.path.join(root, folder))]

    def calculate_mean_and_variance(self, training_root, percentage_of_whole):
    
        transform = transforms.Compose([
                transforms.Resize((96, 96)),
                transforms.ToTensor()
            ])

        test_dataset = datasets.ImageFolder(root=training_root, transform=transform)

        indices = random.sample(population=list(range(len(test_dataset))), k=math.floor(len(test_dataset)*percentage_of_whole))
        sample = Subset(test_dataset, indices)
        loader = DataLoader(sample)

        mean = 0.0
        variance = 0.0
        total_images = 0

        for images, _ in loader:
            # Rearrange batch to be the shape of [B, C, W * H]
            images = images.view(images.size(0), images.size(1), -1)
            # Update total_images
            total_images += images.size(0)
            # Compute mean and variance here
            mean += images.mean(2).sum(0) 
            variance += images.var(2).sum(0)

        # Final mean and variance
        mean /= total_images
        variance /= total_images

        return mean, variance.sqrt()
    
    def augmentation_generator(self):
        while self.augmentations:
            current_aug = self.augmentations.pop()
            current_aug_path = os.path.join(self.root, current_aug)
            mean, std = self.calculate_mean_and_variance(current_aug_path, 0.25)
            #print(mean, std)
            transform = transforms.Compose([
                transforms.Resize((96, 96)),
                transforms.ToTensor(),
                transforms.Normalize(mean=mean, std=std)
            ])
            yield os.path.basename(current_aug), datasets.ImageFolder(root=current_aug_path, transform=transform), mean, std

## Model definition
The CNN1 class defines the architecture of the convolutional neural network. The model consists of three convolutional layers followed by max pooling and batch normalization. The convolutional layers are followed by three fully connected layers.

In [7]:
class CNN1(nn.Module):
    def __init__(self, n_classes=0):
        super(CNN1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=100, kernel_size=3, padding=1, bias=False)  # Output size: 96 x 96 x 100
        self.norm1 = nn.BatchNorm2d(num_features=100)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: 48 x 48 x 100
        self.conv2 = nn.Conv2d(in_channels=100, out_channels=150, kernel_size=3, padding=1, bias=False)  # Output size: 48 x 48 x 150
        self.norm2 = nn.BatchNorm2d(num_features=150)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: 24 x 24 x 150
        self.conv3 = nn.Conv2d(in_channels=150, out_channels=250, kernel_size=3, padding=1, bias=False)  # Output size: 24 x 24 x 250
        self.norm3 = nn.BatchNorm2d(num_features=250)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: 12 x 12 x 250
        # Calculate the total number of features in the flattened feature map
        flat_features = 12 * 12 * 250
        self.fc1 = nn.Linear(in_features=flat_features, out_features=4000)  # Output: 4000 units
        self.fc2 = nn.Linear(in_features=4000, out_features=200)  # Output: 200 units
        self.fc3 = nn.Linear(in_features=200, out_features=n_classes)  #33 Output: 46 units

    def forward(self, x):
        x = self.pool1(self.norm1(F.relu(self.conv1(x))))
        x = self.pool2(self.norm2(F.relu(self.conv2(x))))
        x = self.pool3(self.norm3(F.relu(self.conv3(x))))
        x = torch.flatten(x, start_dim=1)  # Flatten starting from dimension 1
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Helper functions

In [8]:
def upload_directory_to_gcs(bucket_name, source_directory, destination_blob_prefix):
    """Uploads a local directory and its subdirectories to a GCS bucket."""
    client = storage.Client(project=project_id)
    bucket = client.bucket(bucket_name)

    for dirpath, dirnames, filenames in os.walk(source_directory):
        for filename in filenames:
            local_file_path = os.path.join(dirpath, filename)
            relative_path = os.path.relpath(local_file_path, source_directory)
            blob_path = os.path.join(destination_blob_prefix, relative_path)
            blob = bucket.blob(blob_path)
            blob.upload_from_filename(local_file_path)
            #print(f"File {local_file_path} uploaded to {blob_path}.")

In [9]:
def calculate_accuracy(model, data_loader, device):
    """Calculates accuracy on given dataset"""
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

def evaluate_and_save_results(model, loader, device, classes, bucket_name, prefix, aug_name):
    """Evaluates the model and saves results and the model itself to Google Cloud Storage under a specific subfolder."""
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Compute the confusion matrix
    cm = confusion_matrix(all_labels, all_preds, labels=np.arange(len(classes)))

    # Folder prefix including augmentation name
    full_prefix = f"{prefix}/results/{aug_name}"
    os.makedirs(full_prefix)

    # Save confusion matrix
    cm_path = f"{full_prefix}/confusion_matrix_{aug_name}.npy"
    np.save(cm_path, cm)

    # Save predictions
    preds_path = f"{full_prefix}/predictions_{aug_name}.npy"
    np.save(preds_path, np.array(all_preds))

    # Save the model
    model_path = f"{full_prefix}/model_{aug_name}.pth"
    torch.save(model.state_dict(), model_path)

    return cm

def train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, scheduler, device, writer, prefix, epochs=5, patience=10, min_delta=0.001):
    best_val_loss = float('inf')
    best_val_acc = 0
    epochs_no_improve = 0
    early_stop = False
    # Path to save the best model
    os.makedirs(prefix)
    best_model_path = f'{prefix}/best_model.pth'

    for epoch in tqdm(range(epochs)):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
        scheduler.step()  # Update the learning rate    

        train_loss = running_loss / len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
        val_loss /= len(val_loader.dataset)

        train_accuracy = calculate_accuracy(model, train_loader, device)
        val_accuracy = calculate_accuracy(model, val_loader, device)
        test_accuracy = calculate_accuracy(model, test_loader, device)

        writer.add_scalars('Loss', {'Train': train_loss, 'Validation': val_loss}, epoch)
        writer.add_scalars('Accuracy', {'Train': train_accuracy, 'Validation': val_accuracy}, epoch)

        if val_loss < best_val_loss - min_delta:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), best_model_path)  # Save the best model checkpoint
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch + 1} epochs.")
                early_stop = True
                break

        print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%, Test Acc: {test_accuracy:.2f}%')

    model.load_state_dict(torch.load(best_model_path, map_location=device))
    test_accuracy = calculate_accuracy(model, test_loader, device)
    print(test_accuracy)
    writer.add_scalar('Accuracy/test', test_accuracy, 1)
    writer.close()
    return test_accuracy


# Training and evaluation loop
This loop trains and evaluates the model for each augmentation dataset. The results are saved to Google Cloud Storage.

In [None]:
runs = [
    ['data/synthetic', 'data/test_data']
]
for TRAINING_DATA_ROOT, TEST_DATA_ROOT in runs:
    BUCKET_NAME = 'sign-recognition-metrics'
    PREFIX = 'metrics/' + datetime.now().strftime('%Y%m%d-%H%M%S')

    train_augmentation_generator = AugmentationTrafficSignLoader(root=TRAINING_DATA_ROOT).augmentation_generator()
    for aug_name, aug_variant, mean, std in train_augmentation_generator:
        # Ensure GPU memory is clean before starting the setup
        torch.cuda.empty_cache()
        print(aug_name)

        # Data loading setup
        num_classes = len(aug_variant.classes)
        #print(num_classes)
        #Model initialization and setup
        model = CNN1(num_classes)
        model = nn.DataParallel(model)
        model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.001) #, weight_decay=0.005)
        # Setup scheduler
        scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # Adjusts the learning rate every 10 epochs by halving it
        criterion = nn.CrossEntropyLoss()


        #print(aug_variant.classes)
        train_size = int(0.8 * len(aug_variant))
        val_size = len(aug_variant) - train_size
        train_dataset, val_dataset = random_split(aug_variant, [train_size, val_size])
        train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=16, worker_init_fn=worker_init_fn)
        val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)

        # Load the test dataset
        transform = transforms.Compose([
                    transforms.Resize((96, 96)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=mean, std=std)
                ])
        test_dataset = datasets.ImageFolder(root=TEST_DATA_ROOT, transform=transform)
        idx_to_class = {v: k for k, v in test_dataset.class_to_idx.items()}
        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)
        #print(mean, std)

        # TensorBoard Setup
        writer_dir = os.path.join(PREFIX, 'runs', aug_name)
        writer = SummaryWriter(writer_dir)

        model_dir = f"{PREFIX}/models/{aug_name}"
        # Training and Evaluation
        test_accuracy = train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, scheduler, device, writer, model_dir, epochs=40)
        cm = evaluate_and_save_results(model, test_loader, device, idx_to_class, BUCKET_NAME, PREFIX, aug_name)

        # Upload results
        upload_directory_to_gcs(BUCKET_NAME, PREFIX, PREFIX)

        # Clear memory
        torch.cuda.empty_cache()

8


  2%|▎         | 1/40 [03:50<2:29:42, 230.31s/it]

Epoch 1, Train Loss: 1.0730, Val Loss: 0.1695, Train Acc: 94.73%, Val Acc: 94.04%, Test Acc: 67.25%


  5%|▌         | 2/40 [05:27<1:36:19, 152.08s/it]

Epoch 2, Train Loss: 0.0446, Val Loss: 0.0809, Train Acc: 97.87%, Val Acc: 97.60%, Test Acc: 68.63%


  8%|▊         | 3/40 [07:05<1:18:30, 127.31s/it]

Epoch 3, Train Loss: 0.0311, Val Loss: 0.0201, Train Acc: 99.66%, Val Acc: 99.42%, Test Acc: 71.06%


 10%|█         | 4/40 [08:37<1:07:56, 113.25s/it]

Epoch 4, Train Loss: 0.0112, Val Loss: 0.0752, Train Acc: 97.93%, Val Acc: 97.87%, Test Acc: 67.82%


 12%|█▎        | 5/40 [10:08<1:01:33, 105.52s/it]

Epoch 5, Train Loss: 0.0169, Val Loss: 0.0206, Train Acc: 99.59%, Val Acc: 99.42%, Test Acc: 64.58%


 15%|█▌        | 6/40 [11:40<57:05, 100.75s/it]  

Epoch 6, Train Loss: 0.0128, Val Loss: 0.0108, Train Acc: 99.87%, Val Acc: 99.71%, Test Acc: 68.52%


 18%|█▊        | 7/40 [13:17<54:47, 99.63s/it] 

Epoch 7, Train Loss: 0.0052, Val Loss: 0.0082, Train Acc: 99.93%, Val Acc: 99.75%, Test Acc: 72.69%


 20%|██        | 8/40 [14:55<52:44, 98.91s/it]

Epoch 8, Train Loss: 0.0024, Val Loss: 0.0057, Train Acc: 99.96%, Val Acc: 99.80%, Test Acc: 73.96%


 22%|██▎       | 9/40 [16:26<49:52, 96.54s/it]

Epoch 9, Train Loss: 0.0022, Val Loss: 0.0076, Train Acc: 99.96%, Val Acc: 99.80%, Test Acc: 73.26%


 25%|██▌       | 10/40 [17:57<47:29, 94.99s/it]

Epoch 10, Train Loss: 0.0160, Val Loss: 0.0128, Train Acc: 99.84%, Val Acc: 99.61%, Test Acc: 70.72%


 28%|██▊       | 11/40 [19:35<46:16, 95.74s/it]

Epoch 11, Train Loss: 0.0045, Val Loss: 0.0063, Train Acc: 99.99%, Val Acc: 99.79%, Test Acc: 77.43%


 30%|███       | 12/40 [21:06<44:02, 94.37s/it]

Epoch 12, Train Loss: 0.0011, Val Loss: 0.0055, Train Acc: 100.00%, Val Acc: 99.83%, Test Acc: 76.50%


 32%|███▎      | 13/40 [22:38<42:03, 93.46s/it]

Epoch 13, Train Loss: 0.0002, Val Loss: 0.0046, Train Acc: 100.00%, Val Acc: 99.85%, Test Acc: 77.08%


 35%|███▌      | 14/40 [24:09<40:14, 92.88s/it]

Epoch 14, Train Loss: 0.0001, Val Loss: 0.0046, Train Acc: 100.00%, Val Acc: 99.86%, Test Acc: 76.97%


 38%|███▊      | 15/40 [25:40<38:30, 92.43s/it]

Epoch 15, Train Loss: 0.0001, Val Loss: 0.0044, Train Acc: 100.00%, Val Acc: 99.87%, Test Acc: 77.08%


 40%|████      | 16/40 [27:18<37:32, 93.86s/it]

Epoch 16, Train Loss: 0.0001, Val Loss: 0.0045, Train Acc: 100.00%, Val Acc: 99.88%, Test Acc: 77.66%


 42%|████▎     | 17/40 [28:49<35:41, 93.12s/it]

Epoch 17, Train Loss: 0.0002, Val Loss: 0.1832, Train Acc: 98.68%, Val Acc: 98.37%, Test Acc: 70.02%


 45%|████▌     | 18/40 [30:20<33:56, 92.55s/it]

Epoch 18, Train Loss: 0.0389, Val Loss: 0.0099, Train Acc: 99.92%, Val Acc: 99.69%, Test Acc: 64.93%


 48%|████▊     | 19/40 [31:52<32:15, 92.18s/it]

Epoch 19, Train Loss: 0.0031, Val Loss: 0.0080, Train Acc: 99.97%, Val Acc: 99.75%, Test Acc: 61.69%


 50%|█████     | 20/40 [33:23<30:37, 91.88s/it]

Epoch 20, Train Loss: 0.0006, Val Loss: 0.0054, Train Acc: 100.00%, Val Acc: 99.85%, Test Acc: 63.19%


 52%|█████▎    | 21/40 [34:54<29:04, 91.82s/it]

Epoch 21, Train Loss: 0.0002, Val Loss: 0.0038, Train Acc: 100.00%, Val Acc: 99.90%, Test Acc: 63.43%


 55%|█████▌    | 22/40 [36:26<27:31, 91.77s/it]

Epoch 22, Train Loss: 0.0001, Val Loss: 0.0043, Train Acc: 100.00%, Val Acc: 99.89%, Test Acc: 63.43%


 57%|█████▊    | 23/40 [37:58<26:00, 91.77s/it]

Epoch 23, Train Loss: 0.0005, Val Loss: 0.0040, Train Acc: 100.00%, Val Acc: 99.90%, Test Acc: 70.72%


 60%|██████    | 24/40 [39:29<24:26, 91.68s/it]

Epoch 24, Train Loss: 0.0001, Val Loss: 0.0044, Train Acc: 100.00%, Val Acc: 99.89%, Test Acc: 71.88%


 62%|██████▎   | 25/40 [41:01<22:55, 91.69s/it]

Epoch 25, Train Loss: 0.0000, Val Loss: 0.0043, Train Acc: 100.00%, Val Acc: 99.89%, Test Acc: 71.30%


 62%|██████▎   | 25/40 [42:33<25:31, 102.12s/it]

Early stopping triggered after 26 epochs.





77.66203703703704
3


  2%|▎         | 1/40 [03:21<2:11:11, 201.82s/it]

Epoch 1, Train Loss: 0.5800, Val Loss: 0.0132, Train Acc: 99.69%, Val Acc: 99.65%, Test Acc: 36.57%


  5%|▌         | 2/40 [04:58<1:28:33, 139.83s/it]

Epoch 2, Train Loss: 0.0079, Val Loss: 0.3751, Train Acc: 94.13%, Val Acc: 93.89%, Test Acc: 37.04%


  8%|▊         | 3/40 [06:35<1:14:10, 120.29s/it]

Epoch 3, Train Loss: 0.0231, Val Loss: 0.0026, Train Acc: 99.98%, Val Acc: 99.95%, Test Acc: 39.35%


 10%|█         | 4/40 [08:06<1:05:19, 108.86s/it]

Epoch 4, Train Loss: 0.0013, Val Loss: 0.0011, Train Acc: 99.99%, Val Acc: 99.96%, Test Acc: 38.77%


 12%|█▎        | 5/40 [09:43<1:01:03, 104.66s/it]

Epoch 5, Train Loss: 0.0009, Val Loss: 0.0008, Train Acc: 99.99%, Val Acc: 99.97%, Test Acc: 39.81%


 15%|█▌        | 6/40 [11:15<56:45, 100.15s/it]  

Epoch 6, Train Loss: 0.0009, Val Loss: 0.4810, Train Acc: 97.39%, Val Acc: 97.22%, Test Acc: 33.56%


 18%|█▊        | 7/40 [12:46<53:30, 97.29s/it] 

Epoch 7, Train Loss: 0.1759, Val Loss: 0.0372, Train Acc: 99.31%, Val Acc: 99.32%, Test Acc: 30.67%


 20%|██        | 8/40 [14:17<50:50, 95.34s/it]

Epoch 8, Train Loss: 0.0079, Val Loss: 0.0074, Train Acc: 99.98%, Val Acc: 99.93%, Test Acc: 34.61%


 22%|██▎       | 9/40 [15:49<48:35, 94.06s/it]

Epoch 9, Train Loss: 0.0047, Val Loss: 0.0034, Train Acc: 99.99%, Val Acc: 99.97%, Test Acc: 36.00%


 25%|██▌       | 10/40 [17:20<46:36, 93.22s/it]

Epoch 10, Train Loss: 0.0002, Val Loss: 0.1105, Train Acc: 98.50%, Val Acc: 98.47%, Test Acc: 36.00%


 28%|██▊       | 11/40 [18:52<44:51, 92.83s/it]

Epoch 11, Train Loss: 0.0072, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 34.03%


 30%|███       | 12/40 [20:23<43:07, 92.43s/it]

Epoch 12, Train Loss: 0.0037, Val Loss: 0.0005, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 36.81%


 32%|███▎      | 13/40 [21:55<41:27, 92.12s/it]

Epoch 13, Train Loss: 0.0002, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.96%, Test Acc: 36.57%


 35%|███▌      | 14/40 [23:27<39:54, 92.08s/it]

Epoch 14, Train Loss: 0.0035, Val Loss: 0.0015, Train Acc: 99.99%, Val Acc: 99.96%, Test Acc: 35.42%


 35%|███▌      | 14/40 [24:59<46:24, 107.09s/it]

Early stopping triggered after 15 epochs.





39.81481481481482
5


  2%|▎         | 1/40 [03:22<2:11:44, 202.68s/it]

Epoch 1, Train Loss: 0.5919, Val Loss: 0.0397, Train Acc: 99.04%, Val Acc: 98.80%, Test Acc: 40.51%


  5%|▌         | 2/40 [05:00<1:29:16, 140.97s/it]

Epoch 2, Train Loss: 0.0312, Val Loss: 0.6231, Train Acc: 96.12%, Val Acc: 96.13%, Test Acc: 47.69%


  8%|▊         | 3/40 [06:38<1:14:49, 121.33s/it]

Epoch 3, Train Loss: 0.0306, Val Loss: 0.0032, Train Acc: 99.93%, Val Acc: 99.89%, Test Acc: 49.31%


 10%|█         | 4/40 [08:10<1:05:58, 109.95s/it]

Epoch 4, Train Loss: 0.0022, Val Loss: 0.0060, Train Acc: 99.86%, Val Acc: 99.80%, Test Acc: 48.38%


 12%|█▎        | 5/40 [09:42<1:00:21, 103.47s/it]

Epoch 5, Train Loss: 0.0024, Val Loss: 0.0024, Train Acc: 99.90%, Val Acc: 99.92%, Test Acc: 48.03%


 15%|█▌        | 6/40 [11:14<56:26, 99.59s/it]   

Epoch 6, Train Loss: 0.0025, Val Loss: 0.0012, Train Acc: 99.98%, Val Acc: 99.96%, Test Acc: 47.11%


 18%|█▊        | 7/40 [12:47<53:25, 97.15s/it]

Epoch 7, Train Loss: 0.0003, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 100.00%, Test Acc: 48.61%


 20%|██        | 8/40 [14:19<50:58, 95.58s/it]

Epoch 8, Train Loss: 0.0002, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 48.61%


 22%|██▎       | 9/40 [15:52<48:56, 94.72s/it]

Epoch 9, Train Loss: 0.0001, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 48.84%


 25%|██▌       | 10/40 [17:24<46:58, 93.95s/it]

Epoch 10, Train Loss: 0.0000, Val Loss: 0.0002, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 48.96%


 28%|██▊       | 11/40 [18:56<45:11, 93.49s/it]

Epoch 11, Train Loss: 0.0000, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 48.03%


 30%|███       | 12/40 [20:28<43:25, 93.05s/it]

Epoch 12, Train Loss: 0.0000, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 48.61%


 30%|███       | 12/40 [22:01<51:22, 110.09s/it]

Early stopping triggered after 13 epochs.





49.30555555555556
2


  2%|▎         | 1/40 [02:38<1:42:47, 158.13s/it]

Epoch 1, Train Loss: 0.7391, Val Loss: 0.4293, Train Acc: 95.73%, Val Acc: 95.54%, Test Acc: 30.09%
Epoch 2, Train Loss: 0.0622, Val Loss: 0.0025, Train Acc: 99.97%, Val Acc: 99.92%, Test Acc: 43.63%


  8%|▊         | 3/40 [05:50<1:07:55, 110.15s/it]

Epoch 3, Train Loss: 0.0010, Val Loss: 0.0011, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 44.21%


 10%|█         | 4/40 [07:20<1:01:21, 102.25s/it]

Epoch 4, Train Loss: 0.0007, Val Loss: 0.0008, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 44.10%


 12%|█▎        | 5/40 [08:56<58:21, 100.05s/it]  

Epoch 5, Train Loss: 0.0005, Val Loss: 0.0005, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 44.33%


 15%|█▌        | 6/40 [10:27<54:49, 96.75s/it] 

Epoch 6, Train Loss: 0.0005, Val Loss: 0.0018, Train Acc: 99.99%, Val Acc: 99.94%, Test Acc: 43.87%


 18%|█▊        | 7/40 [11:57<52:02, 94.62s/it]

Epoch 7, Train Loss: 0.0003, Val Loss: 0.0005, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 43.40%


 20%|██        | 8/40 [13:28<49:45, 93.31s/it]

Epoch 8, Train Loss: 0.0002, Val Loss: 0.0028, Train Acc: 99.97%, Val Acc: 99.90%, Test Acc: 44.21%


 22%|██▎       | 9/40 [14:58<47:44, 92.41s/it]

Epoch 9, Train Loss: 0.0013, Val Loss: 0.0012, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 42.48%


 25%|██▌       | 10/40 [16:28<45:51, 91.73s/it]

Epoch 10, Train Loss: 0.0001, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 43.17%


 28%|██▊       | 11/40 [17:58<44:07, 91.29s/it]

Epoch 11, Train Loss: 0.0001, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 43.52%


 30%|███       | 12/40 [19:28<42:25, 90.90s/it]

Epoch 12, Train Loss: 0.0000, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 43.63%


 32%|███▎      | 13/40 [20:59<40:50, 90.75s/it]

Epoch 13, Train Loss: 0.0000, Val Loss: 0.0048, Train Acc: 99.86%, Val Acc: 99.82%, Test Acc: 44.10%


 35%|███▌      | 14/40 [22:29<39:14, 90.56s/it]

Epoch 14, Train Loss: 0.0029, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 41.32%


 35%|███▌      | 14/40 [23:59<44:33, 102.82s/it]

Early stopping triggered after 15 epochs.





44.3287037037037
7


  2%|▎         | 1/40 [03:17<2:08:04, 197.04s/it]

Epoch 1, Train Loss: 0.9853, Val Loss: 0.0632, Train Acc: 97.59%, Val Acc: 97.58%, Test Acc: 56.71%


  5%|▌         | 2/40 [04:54<1:27:39, 138.41s/it]

Epoch 2, Train Loss: 0.0298, Val Loss: 0.0649, Train Acc: 97.79%, Val Acc: 97.69%, Test Acc: 58.68%


  8%|▊         | 3/40 [06:26<1:12:12, 117.09s/it]

Epoch 3, Train Loss: 0.0183, Val Loss: 0.0142, Train Acc: 99.73%, Val Acc: 99.55%, Test Acc: 57.64%


 10%|█         | 4/40 [07:57<1:04:15, 107.09s/it]

Epoch 4, Train Loss: 0.0042, Val Loss: 0.0079, Train Acc: 99.92%, Val Acc: 99.78%, Test Acc: 53.36%


 12%|█▎        | 5/40 [09:29<59:14, 101.55s/it]  

Epoch 5, Train Loss: 0.0062, Val Loss: 0.0084, Train Acc: 99.90%, Val Acc: 99.69%, Test Acc: 54.98%


 15%|█▌        | 6/40 [11:07<56:46, 100.19s/it]

Epoch 6, Train Loss: 0.0096, Val Loss: 0.0602, Train Acc: 98.52%, Val Acc: 98.21%, Test Acc: 59.38%


 18%|█▊        | 7/40 [12:39<53:40, 97.60s/it] 

Epoch 7, Train Loss: 0.0089, Val Loss: 0.0247, Train Acc: 99.51%, Val Acc: 99.35%, Test Acc: 58.56%


 20%|██        | 8/40 [14:17<52:06, 97.70s/it]

Epoch 8, Train Loss: 0.0054, Val Loss: 0.0074, Train Acc: 99.96%, Val Acc: 99.84%, Test Acc: 64.70%


 22%|██▎       | 9/40 [15:49<49:35, 95.98s/it]

Epoch 9, Train Loss: 0.0008, Val Loss: 0.0067, Train Acc: 99.99%, Val Acc: 99.85%, Test Acc: 64.35%


 25%|██▌       | 10/40 [17:21<47:22, 94.74s/it]

Epoch 10, Train Loss: 0.0002, Val Loss: 0.0054, Train Acc: 99.99%, Val Acc: 99.89%, Test Acc: 64.35%


 28%|██▊       | 11/40 [18:55<45:43, 94.61s/it]

Epoch 11, Train Loss: 0.0002, Val Loss: 0.0050, Train Acc: 100.00%, Val Acc: 99.90%, Test Acc: 63.66%


 30%|███       | 12/40 [20:27<43:45, 93.76s/it]

Epoch 12, Train Loss: 0.0000, Val Loss: 0.0047, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 62.62%


 32%|███▎      | 13/40 [21:59<41:55, 93.16s/it]

Epoch 13, Train Loss: 0.0000, Val Loss: 0.0046, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 62.96%


 35%|███▌      | 14/40 [23:31<40:12, 92.81s/it]

Epoch 14, Train Loss: 0.0000, Val Loss: 0.0049, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 63.89%


 38%|███▊      | 15/40 [25:03<38:33, 92.56s/it]

Epoch 15, Train Loss: 0.0000, Val Loss: 0.0046, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 62.85%


 40%|████      | 16/40 [26:35<36:56, 92.36s/it]

Epoch 16, Train Loss: 0.0000, Val Loss: 0.0048, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 62.38%


 42%|████▎     | 17/40 [28:07<35:20, 92.21s/it]

Epoch 17, Train Loss: 0.0000, Val Loss: 0.0048, Train Acc: 100.00%, Val Acc: 99.91%, Test Acc: 62.85%


 42%|████▎     | 17/40 [29:38<40:06, 104.64s/it]

Early stopping triggered after 18 epochs.





64.69907407407408
6


  2%|▎         | 1/40 [03:23<2:12:21, 203.64s/it]

Epoch 1, Train Loss: 0.7326, Val Loss: 0.0221, Train Acc: 99.41%, Val Acc: 99.25%, Test Acc: 48.50%


  5%|▌         | 2/40 [04:55<1:27:14, 137.75s/it]

Epoch 2, Train Loss: 0.0112, Val Loss: 0.8276, Train Acc: 90.52%, Val Acc: 90.41%, Test Acc: 39.12%


  8%|▊         | 3/40 [06:27<1:12:06, 116.93s/it]

Epoch 3, Train Loss: 0.0194, Val Loss: 0.0056, Train Acc: 99.93%, Val Acc: 99.84%, Test Acc: 46.30%


 10%|█         | 4/40 [08:05<1:05:39, 109.44s/it]

Epoch 4, Train Loss: 0.0041, Val Loss: 0.0032, Train Acc: 99.97%, Val Acc: 99.92%, Test Acc: 50.58%


 12%|█▎        | 5/40 [09:37<1:00:10, 103.16s/it]

Epoch 5, Train Loss: 0.0037, Val Loss: 0.0086, Train Acc: 99.81%, Val Acc: 99.68%, Test Acc: 50.12%


 15%|█▌        | 6/40 [11:09<56:18, 99.37s/it]   

Epoch 6, Train Loss: 0.0017, Val Loss: 0.0020, Train Acc: 100.00%, Val Acc: 99.95%, Test Acc: 46.99%


 18%|█▊        | 7/40 [12:41<53:22, 97.04s/it]

Epoch 7, Train Loss: 0.0012, Val Loss: 0.0037, Train Acc: 99.99%, Val Acc: 99.92%, Test Acc: 48.73%


 20%|██        | 8/40 [14:13<50:52, 95.39s/it]

Epoch 8, Train Loss: 0.0005, Val Loss: 0.0022, Train Acc: 100.00%, Val Acc: 99.95%, Test Acc: 46.88%


 22%|██▎       | 9/40 [15:45<48:43, 94.32s/it]

Epoch 9, Train Loss: 0.0001, Val Loss: 0.0210, Train Acc: 99.36%, Val Acc: 99.35%, Test Acc: 46.30%


 25%|██▌       | 10/40 [17:17<46:47, 93.60s/it]

Epoch 10, Train Loss: 0.1109, Val Loss: 0.0407, Train Acc: 99.28%, Val Acc: 99.16%, Test Acc: 48.38%


 28%|██▊       | 11/40 [18:55<45:50, 94.84s/it]

Epoch 11, Train Loss: 0.0079, Val Loss: 0.0023, Train Acc: 99.96%, Val Acc: 99.92%, Test Acc: 53.82%


 30%|███       | 12/40 [20:27<43:53, 94.06s/it]

Epoch 12, Train Loss: 0.0016, Val Loss: 0.0021, Train Acc: 99.98%, Val Acc: 99.89%, Test Acc: 53.24%


 32%|███▎      | 13/40 [21:59<42:03, 93.46s/it]

Epoch 13, Train Loss: 0.0010, Val Loss: 0.0024, Train Acc: 99.97%, Val Acc: 99.93%, Test Acc: 52.31%


 35%|███▌      | 14/40 [23:31<40:17, 93.00s/it]

Epoch 14, Train Loss: 0.0003, Val Loss: 0.0013, Train Acc: 100.00%, Val Acc: 99.95%, Test Acc: 52.78%


 38%|███▊      | 15/40 [25:03<38:36, 92.65s/it]

Epoch 15, Train Loss: 0.0002, Val Loss: 0.0010, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 52.43%


 40%|████      | 16/40 [26:35<36:58, 92.43s/it]

Epoch 16, Train Loss: 0.0001, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 53.36%


 42%|████▎     | 17/40 [28:07<35:22, 92.30s/it]

Epoch 17, Train Loss: 0.0001, Val Loss: 0.0008, Train Acc: 100.00%, Val Acc: 99.96%, Test Acc: 53.12%


 45%|████▌     | 18/40 [29:39<33:48, 92.22s/it]

Epoch 18, Train Loss: 0.0000, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 52.55%


 48%|████▊     | 19/40 [31:11<32:14, 92.13s/it]

Epoch 19, Train Loss: 0.0001, Val Loss: 0.0008, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 53.01%


 50%|█████     | 20/40 [32:42<30:40, 92.04s/it]

Epoch 20, Train Loss: 0.0000, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.96%, Test Acc: 52.66%


 50%|█████     | 20/40 [34:14<34:14, 102.74s/it]

Early stopping triggered after 21 epochs.





53.81944444444444
1


  2%|▎         | 1/40 [02:32<1:39:18, 152.79s/it]

Epoch 1, Train Loss: 0.5434, Val Loss: 0.0029, Train Acc: 99.96%, Val Acc: 99.93%, Test Acc: 32.99%


  5%|▌         | 2/40 [04:02<1:13:22, 115.86s/it]

Epoch 2, Train Loss: 0.0014, Val Loss: 0.2484, Train Acc: 97.35%, Val Acc: 97.25%, Test Acc: 32.06%


  8%|▊         | 3/40 [05:38<1:05:53, 106.86s/it]

Epoch 3, Train Loss: 0.0092, Val Loss: 0.0010, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 33.33%


 10%|█         | 4/40 [07:16<1:01:53, 103.15s/it]

Epoch 4, Train Loss: 0.0009, Val Loss: 0.0002, Train Acc: 100.00%, Val Acc: 100.00%, Test Acc: 34.49%


 12%|█▎        | 5/40 [08:47<57:33, 98.67s/it]   

Epoch 5, Train Loss: 0.0001, Val Loss: 0.0001, Train Acc: 100.00%, Val Acc: 100.00%, Test Acc: 34.38%


 15%|█▌        | 6/40 [10:17<54:21, 95.91s/it]

Epoch 6, Train Loss: 0.0001, Val Loss: 0.3399, Train Acc: 96.44%, Val Acc: 96.52%, Test Acc: 33.22%


 18%|█▊        | 7/40 [11:48<51:45, 94.11s/it]

Epoch 7, Train Loss: 0.2766, Val Loss: 8.6946, Train Acc: 73.92%, Val Acc: 74.27%, Test Acc: 25.23%


 20%|██        | 8/40 [13:18<49:30, 92.83s/it]

Epoch 8, Train Loss: 0.1823, Val Loss: 0.0018, Train Acc: 99.97%, Val Acc: 99.92%, Test Acc: 29.75%


 22%|██▎       | 9/40 [14:48<47:31, 92.00s/it]

Epoch 9, Train Loss: 0.0007, Val Loss: 0.0006, Train Acc: 99.99%, Val Acc: 99.98%, Test Acc: 30.32%


 25%|██▌       | 10/40 [16:18<45:43, 91.45s/it]

Epoch 10, Train Loss: 0.0002, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 30.79%


 28%|██▊       | 11/40 [17:48<43:59, 91.01s/it]

Epoch 11, Train Loss: 0.0000, Val Loss: 0.0007, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 30.56%


 30%|███       | 12/40 [19:18<42:18, 90.65s/it]

Epoch 12, Train Loss: 0.0000, Val Loss: 0.0007, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 30.21%


 32%|███▎      | 13/40 [20:48<40:43, 90.51s/it]

Epoch 13, Train Loss: 0.0000, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.98%, Test Acc: 30.44%


 32%|███▎      | 13/40 [22:18<46:20, 102.98s/it]

Early stopping triggered after 14 epochs.





34.49074074074074
4


  2%|▎         | 1/40 [03:32<2:17:50, 212.07s/it]

Epoch 1, Train Loss: 0.8526, Val Loss: 0.1758, Train Acc: 97.55%, Val Acc: 97.36%, Test Acc: 39.47%


  5%|▌         | 2/40 [05:09<1:31:46, 144.91s/it]

Epoch 2, Train Loss: 0.0212, Val Loss: 0.0033, Train Acc: 99.96%, Val Acc: 99.90%, Test Acc: 45.37%


  8%|▊         | 3/40 [06:41<1:14:26, 120.71s/it]

Epoch 3, Train Loss: 0.0018, Val Loss: 0.0042, Train Acc: 99.89%, Val Acc: 99.86%, Test Acc: 45.14%


 10%|█         | 4/40 [08:19<1:07:00, 111.68s/it]

Epoch 4, Train Loss: 0.0050, Val Loss: 0.0009, Train Acc: 99.99%, Val Acc: 99.97%, Test Acc: 45.60%


 12%|█▎        | 5/40 [09:51<1:00:57, 104.49s/it]

Epoch 5, Train Loss: 0.0014, Val Loss: 0.1586, Train Acc: 97.41%, Val Acc: 97.15%, Test Acc: 45.25%


 15%|█▌        | 6/40 [11:23<56:46, 100.20s/it]  

Epoch 6, Train Loss: 0.0050, Val Loss: 0.3750, Train Acc: 95.73%, Val Acc: 95.85%, Test Acc: 44.10%


 18%|█▊        | 7/40 [12:55<53:34, 97.42s/it] 

Epoch 7, Train Loss: 0.0239, Val Loss: 0.0024, Train Acc: 99.96%, Val Acc: 99.95%, Test Acc: 39.81%


 20%|██        | 8/40 [14:26<50:59, 95.62s/it]

Epoch 8, Train Loss: 0.0020, Val Loss: 0.0010, Train Acc: 100.00%, Val Acc: 99.96%, Test Acc: 40.39%


 22%|██▎       | 9/40 [15:58<48:47, 94.45s/it]

Epoch 9, Train Loss: 0.0004, Val Loss: 0.0005, Train Acc: 99.99%, Val Acc: 99.99%, Test Acc: 40.05%


 25%|██▌       | 10/40 [17:30<46:50, 93.67s/it]

Epoch 10, Train Loss: 0.0003, Val Loss: 0.0003, Train Acc: 100.00%, Val Acc: 99.99%, Test Acc: 41.55%


 28%|██▊       | 11/40 [19:02<45:01, 93.14s/it]

Epoch 11, Train Loss: 0.0001, Val Loss: 0.0009, Train Acc: 100.00%, Val Acc: 99.96%, Test Acc: 41.44%


 30%|███       | 12/40 [20:34<43:19, 92.84s/it]

Epoch 12, Train Loss: 0.0000, Val Loss: 0.0006, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 41.20%


 32%|███▎      | 13/40 [22:06<41:39, 92.59s/it]

Epoch 13, Train Loss: 0.0000, Val Loss: 0.0004, Train Acc: 100.00%, Val Acc: 99.97%, Test Acc: 41.32%


 32%|███▎      | 13/40 [23:38<49:06, 109.12s/it]

Early stopping triggered after 14 epochs.





45.601851851851855


## Batch size hyperparameter tuning

In [None]:
batches = [512, 256, 128, 64, 32, 16]
TRAINING_DATA_ROOT = 'data/synthetic/8'
TEST_DATA_ROOT = 'data/test_data'


BUCKET_NAME = 'sign-recognition-metrics'
PREFIX = 'metrics/batch_' + datetime.now().strftime('%Y%m%d-%H%M%S')
best_batch_accuracy = 0
best_batch_size = 512

train_augmentation_generator = AugmentationTrafficSignLoader(root=TRAINING_DATA_ROOT).augmentation_generator()
for aug_name, aug_variant, mean, std in train_augmentation_generator:
    for BATCH_SIZE in batches:
        print(BATCH_SIZE)
        # Ensure GPU memory is clean before starting the setup
        torch.cuda.empty_cache()
        print(aug_name)

        # Data loading setup
        num_classes = len(aug_variant.classes)
        #print(num_classes)
        #Model initialization and setup
        model = CNN1(num_classes)
        model = nn.DataParallel(model)
        model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.001) #, weight_decay=0.005)
        # Setup scheduler
        scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # Adjusts the learning rate every 10 epochs by halving it
        criterion = nn.CrossEntropyLoss()


        #print(aug_variant.classes)
        train_size = int(0.8 * len(aug_variant))
        val_size = len(aug_variant) - train_size
        train_dataset, val_dataset = random_split(aug_variant, [train_size, val_size])
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=16, worker_init_fn=worker_init_fn)
        val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)

        # Load the test dataset
        transform = transforms.Compose([
                    transforms.Resize((96, 96)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=mean, std=std)
                ])
        test_dataset = datasets.ImageFolder(root=TEST_DATA_ROOT, transform=transform)
        idx_to_class = {v: k for k, v in test_dataset.class_to_idx.items()}
        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)
        #print(mean, std)

        # TensorBoard Setup
        writer_dir = os.path.join(PREFIX, 'runs', f'{BATCH_SIZE}')
        writer = SummaryWriter(writer_dir)

        model_dir = f"{PREFIX}/models/{BATCH_SIZE}"
        # Training and Evaluation
        test_accuracy = train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, scheduler, device, writer, model_dir, epochs=40)
        if test_accuracy > best_batch_accuracy:
            best_batch_accuracy = test_accuracy
            best_batch_size = BATCH_SIZE
        cm = evaluate_and_save_results(model, test_loader, device, idx_to_class, BUCKET_NAME, PREFIX, aug_name)

        # Upload results
        upload_directory_to_gcs(BUCKET_NAME, PREFIX, PREFIX)

        # Clear memory
        torch.cuda.empty_cache()

## Dropout and L2 regularization hyperparameter tuning
With best batch size found in the previous step, we will now tune the dropout hyperparameters. Uncomment decay in the optimizer initialization to enable L2 regularization.

In [None]:
class CNN1(nn.Module):
    def __init__(self, n_classes=35):
        super(CNN1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=100, kernel_size=3, padding=1, bias=False)
        self.norm1 = nn.BatchNorm2d(num_features=100)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.5)  # Dropout after first pooling

        self.conv2 = nn.Conv2d(in_channels=100, out_channels=150, kernel_size=3, padding=1, bias=False)
        self.norm2 = nn.BatchNorm2d(num_features=150)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.5)  # Dropout after second pooling

        self.conv3 = nn.Conv2d(in_channels=150, out_channels=250, kernel_size=3, padding=1, bias=False)
        self.norm3 = nn.BatchNorm2d(num_features=250)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout3 = nn.Dropout(0.5)  # Dropout after third pooling

        flat_features = 12 * 12 * 250
        self.fc1 = nn.Linear(in_features=flat_features, out_features=1000)
        self.fc2 = nn.Linear(in_features=1000, out_features=100)
        self.fc3 = nn.Linear(in_features=100, out_features=n_classes)
        self.dropout4 = nn.Dropout(0.8)  # Dropout before the fully connected layers

    def forward(self, x):
        x = self.dropout1(self.pool1(self.norm1(F.relu(self.conv1(x)))))
        x = self.dropout2(self.pool2(self.norm2(F.relu(self.conv2(x)))))
        x = self.dropout3(self.pool3(self.norm3(F.relu(self.conv3(x)))))
        x = torch.flatten(x, start_dim=1)
        x = self.dropout4(F.relu(self.fc1(x)))
        x = self.dropout4(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [None]:
BUCKET_NAME = 'sign-recognition-metrics'
PREFIX = 'metrics/drop_' + datetime.now().strftime('%Y%m%d-%H%M%S')
train_augmentation_generator = AugmentationTrafficSignLoader(root=TRAINING_DATA_ROOT).augmentation_generator()
for aug_name, aug_variant, mean, std in train_augmentation_generator:
    # Ensure GPU memory is clean before starting the setup
    torch.cuda.empty_cache()
    print(aug_name)

    # Data loading setup
    num_classes = len(aug_variant.classes)
    #print(num_classes)
    #Model initialization and setup
    model = CNN1(num_classes)
    model = nn.DataParallel(model)
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001) #, weight_decay=0.005)
    # Setup scheduler
    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # Adjusts the learning rate every 10 epochs by halving it
    criterion = nn.CrossEntropyLoss()


    #print(aug_variant.classes)
    train_size = int(0.8 * len(aug_variant))
    val_size = len(aug_variant) - train_size
    train_dataset, val_dataset = random_split(aug_variant, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True, num_workers=16, worker_init_fn=worker_init_fn)
    val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)

    # Load the test dataset
    transform = transforms.Compose([
                transforms.Resize((96, 96)),
                transforms.ToTensor(),
                transforms.Normalize(mean=mean, std=std)
            ])
    test_dataset = datasets.ImageFolder(root=TEST_DATA_ROOT, transform=transform)
    idx_to_class = {v: k for k, v in test_dataset.class_to_idx.items()}
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=16, worker_init_fn=worker_init_fn)
    #print(mean, std)

    # TensorBoard Setup
    writer_dir = os.path.join(PREFIX, 'runs', aug_name)
    writer = SummaryWriter(writer_dir)

    model_dir = f"{PREFIX}/models/{aug_name}"
    # Training and Evaluation
    test_accuracy = train_and_evaluate(model, train_loader, val_loader, test_loader, criterion, optimizer, scheduler, device, writer, model_dir, epochs=40)
    if test_accuracy > best_batch_accuracy:
        best_batch_accuracy = test_accuracy
        best_batch_size = BATCH_SIZE
    cm = evaluate_and_save_results(model, test_loader, device, idx_to_class, BUCKET_NAME, PREFIX, aug_name)

    # Upload results
    upload_directory_to_gcs(BUCKET_NAME, PREFIX, PREFIX)

    # Clear memory
    torch.cuda.empty_cache()