In [3]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split
from torch.nn import functional as F
import torchvision
from torchvision import datasets,transforms
import torchvision.transforms as transforms
import optuna
import os

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10


train_dataset = torchvision.datasets.MNIST('classifier_data', train=True, download=True)
m=len(train_dataset)

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

train_dataset.transform=transform

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
class ConvNet(nn.Module): 
    def __init__(self, trial): 
        super(ConvNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)

        dropout_rate = trial.suggest_float("dropout_rate", 0, 0.5,step=0.1)
        self.drop1 = nn.Dropout2d(p=dropout_rate)

        fc2_input_dim = trial.suggest_int("fc2_input_dim", 32, 128, 32)
        self.fc1 = nn.Linear(32*7*7, fc2_input_dim)

        dropout_rate2 = trial.suggest_float("dropout_rate", 0, 0.3,step=0.1)
        self.drop2 = nn.Dropout2d(p=dropout_rate2)

        self.fc2 = nn.Linear(fc2_input_dim, 10)

    def forward(self, x): 
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = self.drop1(x)

        # Reshape the data for fully connected layers
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.drop2(x)
        x = self.fc2(x)

        return x



# Function to get training/validation sets based on the batch size
def get_mnist(train_dataset, batch_size): 
    train_data, val_data = random_split(train_dataset, [int(m-m*0.2), int(m*0.2)])
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
    valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)

    return train_loader, valid_loader


In [5]:
# Optimization - uses objective function to choose hyperparameter sampling 
def objective(trial): 

    model = ConvNet(trial).to(DEVICE)
    
    # Use Optuna for optimizer hyperparams 
    optimizer_name = trial.suggest_categorical("optimizer", ["RMSprop", "SGD"])
    momentum = trial.suggest_float("momentum", 0.0, 1.0)
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    batch_size = trial.suggest_int("batch_size", 64, 256, step=64)

    criterion = nn.CrossEntropyLoss()

    # Get the dataset for this trial 
    train_loader, valid_loader = get_mnist(train_dataset, batch_size)

    for epoch in range(EPOCHS): 
        # Sets model to training mode (enable dropout layers while training)
        model.train()

        for batch_idx, (images, labels) in enumerate(train_loader): 
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

        # Set model to evaluation mode 
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (images, labels) in enumerate(valid_loader): 
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                output = model(images)

                # Get the index of the max log-probability
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(labels.view_as(pred)).sum().item()

        accuracy = correct / len(valid_loader.dataset)

        trial.report(accuracy, epoch)

        if trial.should_prune(): 
            raise optuna.exceptions.TrialPruned()

    return accuracy



In [6]:
# Create an optuna study to maximize accuracy based on hyper parameters - takes too long on my laptop
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

trial = study.best_trial

print("Accuracy: ", trial.value)
print("Best hyperparameters: ", trial.params)

[32m[I 2023-03-26 16:44:40,008][0m A new study created in memory with name: no-name-d7201f40-e5e2-49c3-99ab-ea406c996778[0m
[32m[I 2023-03-26 16:48:49,898][0m Trial 0 finished with value: 0.9789166666666667 and parameters: {'dropout_rate': 0.4, 'fc2_input_dim': 64, 'optimizer': 'RMSprop', 'momentum': 0.35822649683562435, 'lr': 0.00012533061356338708, 'batch_size': 64}. Best is trial 0 with value: 0.9789166666666667.[0m
[33m[W 2023-03-26 16:49:33,375][0m Trial 1 failed with parameters: {'dropout_rate': 0.5, 'fc2_input_dim': 128, 'optimizer': 'SGD', 'momentum': 0.5475510469501521, 'lr': 0.004312867385752242, 'batch_size': 192} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/Users/rishubhanda/.virtualenvs/tutorials/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/ms/7qgbbtls7n53d594y_8v14140000gn/T/ipykernel_17933/3476983115.py", line 37, in o

KeyboardInterrupt: 

In [None]:
df = study.trials_dataframe().drop(['state','datetime_start','datetime_complete','duration','number'], axis=1)
df.tail(5)

In [None]:
optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_contour(study, params=['batch_size', 'lr'])
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_param_importances(study)

