#Introduction

The code provided here is exactly the same as in In-class Exercise 1. This time, we are going to implement random search.

Note: Again, while MLPs can work for image classification, they do not utilize the spatial structure of images and typically perform poorly on image datasets like CIFAR-10. Convolutional Neural Networks (CNNs) are better suited for these tasks. However, today we will focus on using this example to explore hyperparameter optimization (HPO).

In [1]:
#Chung Ki Ki 11443843
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

#New libraries for search space
import random
import torch.optim as optim

In [2]:
# Define the search space for rondomly select the parameters
search_space = {
    "learning_rate": [0.001, 0.05, 0.1],
    "hidden_size1": [256, 512, 1024],
    "hidden_size2": [128, 256, 512],
    "dropout_prob": [0.1, 0.2, 0.3],
    "batch_size": [32, 64, 128]
}

In [3]:
# Number of random samples to try and initial for searching
num_trials = 3
best_loss = float('inf')
best_acc =0
best_params = None

In [4]:
# Define transformations: Normalize images and apply basic augmentations
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize RGB channels
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize RGB channels
])

# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform_test, download=True)

# Split training dataset into train and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Check dataset
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Number of training samples: 40000
Number of validation samples: 10000
Number of test samples: 10000


In [5]:
# Define the MLP model with dropout
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, dropout_prob=0.5):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_prob)  # Dropout layer

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout after activation
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)  # No activation in the output layer
        return x

# Model parameters
input_size = 32 * 32 * 3  # CIFAR-10 images are 32x32x3
output_size = 10  # 10 classes in CIFAR-10



In [6]:
def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')  # He Initialization
        nn.init.zeros_(m.bias)



In [7]:
# Check if CUDA (GPU) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move the model to GPU (if available)


Using device: cuda


In [8]:
# Loss function
criterion = nn.CrossEntropyLoss()



In [9]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [10]:
# Training loop with early stopping
num_epochs = 5
early_stopping = EarlyStopping(patience=5, delta=0.01)

# Traing based on random search
for _ in range(num_trials):

    # Randomly sample hyperparameters
    params = {key: random.choice(values) for key, values in search_space.items()}
    # Instantiate model by random search
    model = MLP(input_size, params["hidden_size1"], params["hidden_size2"], output_size, params["dropout_prob"])
    # Apply weight initialization
    model.apply(initialize_weights)

    #Call GPU
    model = model.to(device)
    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"])

    #Show the detail of trial
    print(f"Trial {_+1} Parameters: {params}")
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            # Move data to GPU
            images = images.view(-1, 32 * 32 * 3).to(device)  # Flatten images
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        correct, total = 0, 0
        # Validation step
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.view(-1, 32 * 32 * 3).to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        #Get the accuracy
        acc = correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(val_loader):.4f}")
        # Track best hyperparameters
        if acc > best_acc:
            best_acc = acc
            best_loss = (val_loss/len(val_loader))
            best_params = params
        # Check early stopping
        early_stopping(val_loss / len(val_loader))
        if early_stopping.early_stop:
            print("Early stopping triggered")

            break



Trial 1 Parameters: {'learning_rate': 0.001, 'hidden_size1': 512, 'hidden_size2': 128, 'dropout_prob': 0.3, 'batch_size': 64}
Epoch [1/5], Train Loss: 2.0741, Validation Loss: 1.8631
Epoch [2/5], Train Loss: 1.9153, Validation Loss: 1.7870
Epoch [3/5], Train Loss: 1.8479, Validation Loss: 1.7663
Epoch [4/5], Train Loss: 1.8132, Validation Loss: 1.7225
Epoch [5/5], Train Loss: 1.7861, Validation Loss: 1.6932
Trial 2 Parameters: {'learning_rate': 0.05, 'hidden_size1': 256, 'hidden_size2': 512, 'dropout_prob': 0.1, 'batch_size': 32}
Epoch [1/5], Train Loss: 4.7384, Validation Loss: 2.3127
Epoch [2/5], Train Loss: 2.3186, Validation Loss: 2.3086
Epoch [3/5], Train Loss: 2.3334, Validation Loss: 2.3091
Epoch [4/5], Train Loss: 2.3070, Validation Loss: 2.3037
Epoch [5/5], Train Loss: 2.3067, Validation Loss: 2.3110
Early stopping triggered
Trial 3 Parameters: {'learning_rate': 0.05, 'hidden_size1': 256, 'hidden_size2': 256, 'dropout_prob': 0.1, 'batch_size': 128}
Epoch [1/5], Train Loss: 4.4

In [11]:
print(f"Best Parameters: {best_params}, Best Validatiion Accuracy: {best_acc:.4f}, Best Validation Loss: {best_loss:.4f},")

Best Parameters: {'learning_rate': 0.001, 'hidden_size1': 512, 'hidden_size2': 128, 'dropout_prob': 0.3, 'batch_size': 64}, Best Validatiion Accuracy: 0.3845, Best Validation Loss: 1.6932,
