In [1]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from bayes_opt import BayesianOptimization

  warn(


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = 'cpu'
print(f"Running on {device}")

Running on cuda:0


In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class TimeStopping:
    def __init__(self, seconds):
        self.seconds = seconds
        self.start_time = None

    def start(self):
        self.start_time = time.time()

    def should_stop(self):
        elapsed_time = time.time() - self.start_time
        if elapsed_time > self.seconds:
            print(f"\nStopping training: elapsed time {elapsed_time:.2f}s > limit {self.seconds}s")
            return True
        return False

In [5]:
class MLP(nn.Module):
    def __init__(self, num_units=512):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32*32*3, num_units)
        self.bn = nn.BatchNorm1d(num_units)
        self.fc2 = nn.Linear(num_units, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.bn(x)
        x = self.fc2(x)
        return x

In [None]:
# Setup grid search parameters
learning_rates = [0.001, 0.005, 0.01, 0.05, 0.1]
batch_sizes = [16, 32]  # start with smaller batches
num_units_values = list(range(512, 2049, 512))  # start with smaller model sizes

num_epochs = 10

# Grid Search Loop
results = []

for lr in learning_rates:
    for bs in batch_sizes:
        for nu in num_units_values:
            
            trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
            model = MLP(num_units=nu).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)
            
            for epoch in range(num_epochs):  # loop over the dataset multiple times
                running_loss = 0.0
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data[0].to(device), data[1].to(device)
                    optimizer.zero_grad()  # zero the parameter gradients
                    outputs = model(inputs)  # forward
                    loss = criterion(outputs, labels)  # loss
                    loss.backward()  # backward
                    optimizer.step()  # optimize
                    running_loss += loss.item()

                print(f"Epoch: {epoch+1}, Learning Rate: {lr}, Batch Size: {bs}, Num Units: {nu}, Loss: {running_loss / len(trainloader)}")
            results.append((lr, bs, nu, running_loss / len(trainloader)))

# Sorting results by loss to find the best combination
best_combination = sorted(results, key=lambda x: x[3])[0]
print("Best Combination: Learning Rate: {}, Batch Size: {}, Num Units: {}, Loss: {}".format(*best_combination))

Epoch: 1, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.7308477506637574
Epoch: 2, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.5623120458030701
Epoch: 3, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.4797582215118408
Epoch: 4, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.414804999294281
Epoch: 5, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.3598065354537965
Epoch: 6, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.3152550214958192
Epoch: 7, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.2790527084827423
Epoch: 8, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.2335727715301514
Epoch: 9, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.201054294614792
Epoch: 10, Learning Rate: 0.001, Batch Size: 16, Num Units: 512, Loss: 1.1693300477027893
Epoch: 1, Learning Rate: 0.001, Batch Size: 16, Num Units: 1024, Loss: 1.7905997957801818
Epoch: 2, Learning Ra