In [4]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import optuna
from optuna.trial import TrialState

from torch.utils.data import DataLoader
from torchvision import datasets

In [5]:
class CNN(nn.Module):
    def __init__(self, trial, num_conv_layers, num_dense_layers, num_filters, num_neurons):
        super(CNN, self).__init__()
        input_size = 32
        kernel_size = 3

        # define the convolutional layers
        self.conv_layers = nn.ModuleList([nn.Conv2d(3, num_filters[0], kernel_size=(3, 3))])
        out_size = input_size - kernel_size + 1
        for i in range(1, num_conv_layers):
            conv = nn.Conv2d(in_channels=num_filters[i-1], out_channels=num_filters[i], kernel_size=(3, 3))
            self.conv_layers.append(conv)

            out_size = out_size - kernel_size + 1

        # size of flattened features from convs
        self.out_feature = num_filters[num_conv_layers-1] * out_size * out_size

        # define the dense layers
        self.dense_layers = nn.ModuleList([nn.Linear(self.out_feature, num_neurons[0])])
        for i in range(1, num_dense_layers):
            dense = nn.Linear(num_neurons[i-1], num_neurons[i])
            self.dense_layers.append(dense)


    def forward(self, x):
        # applying conv layers
        for i, conv_i in enumerate(self.conv_layers):
            x = F.relu(conv_i(x))

        # flatten the cnn features
        x = x.view(-1, self.out_feature)

        # applying dense layers
        for i, dense_i in enumerate(self.dense_layers):
            if i == len(self.dense_layers) - 1: # last layer: log_softmax
                x = F.log_softmax(dense_i(x), dim=1)
            else: # non-last layers: ReLU
                x = F.relu(dense_i(x))

        return x

In [6]:
def objective(trial):
    # model hyperparams value ranges
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 4)
    num_dense_layers = trial.suggest_int('num_dense_layers', 1, 3)
    num_filters = [int(trial.suggest_float(f'num_filter_conv{i}', 8, 64, step=8)) for i in range(num_conv_layers)]
    num_neurons = [int(trial.suggest_float(f'num_neuron_dense{i}', 8, 64, step=8)) for i in range(num_dense_layers-1)]
    num_neurons += [100]

    # optimizer hyperparams value ranges
    optimizer_name = trial.suggest_categorical('optimizer_name', ['Adam', 'RMSprop', 'SGD'])
    lr = trial.suggest_float('lr', 1e-3, 1e-2, log=True)

    # init optimizer and model based on suggested params
    model = CNN(trial, num_conv_layers, num_dense_layers, num_filters, num_neurons)
    model = model.to(device)

    match optimizer_name:
        case 'Adam':
            optimizer = torch.optim.Adam(model.parameters(), lr)
        case 'RMSprop':
            optimizer = torch.optim.RMSprop(model.parameters(), lr)
        case 'SGD':
            optimizer = torch.optim.SGD(model.parameters(), lr)


    # training loop
    for epoch in range(n_epochs):
        model.train() # set model to training mode

        for X, y in train_loader: # load a training batch
            # clear prev batch grads
            optimizer.zero_grad()
            # send X and y to suitable device for model
            X = X.to(device)
            y = y.to(device)
            # forward
            p = model(X)
            # compute and propagate loss through network
            loss = F.nll_loss(p, y)
            loss.backward()
            optimizer.step()

        # set model to evaluation mode
        model.eval()
        num_correct = 0
        # turn off gradient calculation
        with torch.no_grad():
            for X, y in test_loader:  # load a testing batch
                # send X and y to suitable device for model
                X = X.to(device)
                y = y.to(device)
                # forward
                p = model(X)
                # finding max value in each row, return indexes of max values
                pred = p.data.max(1, keepdim=True)[1]
                # count correct predictions
                num_correct += pred.eq(y.data.view_as(pred)).sum()

        # calculating accuracy
        acc = num_correct / len(test_loader.dataset)
        # for pruning (stops trial early if not promising)
        trial.report(acc, epoch)
        # handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return acc

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

n_epochs = 10
time_out = 1800
bs_train = 64
bs_test = 1024

cuda


In [7]:
# loading (downloading) cifar100 datasets
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,))
])
train_loader = DataLoader(
    torchvision.datasets.CIFAR100('.', train=True, download=True, transform=transform),
    batch_size=bs_train, shuffle=True
)
test_loader = DataLoader(
    torchvision.datasets.CIFAR100('.', train=False, download=True, transform=transform),
    batch_size=bs_test, shuffle=False
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:05<00:00, 28648412.55it/s]


Extracting ./cifar-100-python.tar.gz to .
Files already downloaded and verified


In [9]:
# create an optuna study to maximize test accuracy
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=None, timeout=time_out, show_progress_bar=True, gc_after_trial=True)

[I 2023-07-01 16:47:34,424] A new study created in memory with name: no-name-bf60752c-137b-4134-a4ce-ea4cc5420a57


   0%|          | 00:00/30:00

[I 2023-07-01 16:51:02,871] Trial 0 finished with value: 0.25029999017715454 and parameters: {'num_conv_layers': 1, 'num_dense_layers': 1, 'num_filter_conv0': 16.0, 'optimizer_name': 'Adam', 'lr': 0.0011957945174085854}. Best is trial 0 with value: 0.25029999017715454.
[I 2023-07-01 16:54:23,344] Trial 1 finished with value: 0.10260000079870224 and parameters: {'num_conv_layers': 2, 'num_dense_layers': 1, 'num_filter_conv0': 16.0, 'num_filter_conv1': 8.0, 'optimizer_name': 'Adam', 'lr': 0.005241387279134828}. Best is trial 0 with value: 0.25029999017715454.
[I 2023-07-01 16:57:42,793] Trial 2 finished with value: 0.1030999943614006 and parameters: {'num_conv_layers': 1, 'num_dense_layers': 3, 'num_filter_conv0': 32.0, 'num_neuron_dense0': 24.0, 'num_neuron_dense1': 16.0, 'optimizer_name': 'RMSprop', 'lr': 0.002992343901890578}. Best is trial 0 with value: 0.25029999017715454.
[I 2023-07-01 17:01:03,006] Trial 3 finished with value: 0.21679998934268951 and parameters: {'num_conv_layers'

In [None]:
# Find number of pruned and completed trials
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

In [None]:
# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

trial = study.best_trial
print("Best trial:")
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
# Save results to csv file
df = study.trials_dataframe().drop(['datetime_start', 'datetime_complete', 'duration'], axis=1)  # Exclude columns
df = df.loc[df['state'] == 'COMPLETE']        # Keep only results that did not prune
df = df.drop('state', axis=1)                 # Exclude state column
df = df.sort_values('value')                  # Sort based on accuracy
df.to_csv('optuna_results.csv', index=False)  # Save to csv file

# Display results in a dataframe
print("\nOverall Results (ordered by accuracy):\n {}".format(df))

# Find the most important hyperparameters
most_important_parameters = optuna.importance.get_param_importances(study, target=None)

# Display the most important hyperparameters
print('\nMost important hyperparameters:')
for key, value in most_important_parameters.items():
    print('  {}:{}{:.2f}%'.format(key, (15-len(key))*' ', value*100))

# Part A
without prune

In [8]:
def objective(trial):
    # model hyperparams value ranges
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 4)
    num_dense_layers = trial.suggest_int('num_dense_layers', 1, 3)
    num_filters = [int(trial.suggest_float(f'num_filter_conv{i}', 8, 64, step=8)) for i in range(num_conv_layers)]
    num_neurons = [int(trial.suggest_float(f'num_neuron_dense{i}', 8, 64, step=8)) for i in range(num_dense_layers-1)]
    num_neurons += [100]

    # optimizer hyperparams value ranges
    optimizer_name = trial.suggest_categorical('optimizer_name', ['Adam', 'RMSprop', 'SGD'])
    lr = trial.suggest_float('lr', 1e-3, 1e-2, log=True)

    # init optimizer and model based on suggested params
    model = CNN(trial, num_conv_layers, num_dense_layers, num_filters, num_neurons)
    model = model.to(device)

    match optimizer_name:
        case 'Adam':
            optimizer = torch.optim.Adam(model.parameters(), lr)
        case 'RMSprop':
            optimizer = torch.optim.RMSprop(model.parameters(), lr)
        case 'SGD':
            optimizer = torch.optim.SGD(model.parameters(), lr)


    # training loop
    for epoch in range(n_epochs):
        model.train() # set model to training mode

        for X, y in train_loader: # load a training batch
            # clear prev batch grads
            optimizer.zero_grad()
            # send X and y to suitable device for model
            X = X.to(device)
            y = y.to(device)
            # forward
            p = model(X)
            # compute and propagate loss through network
            loss = F.nll_loss(p, y)
            loss.backward()
            optimizer.step()

        # set model to evaluation mode
        model.eval()
        num_correct = 0
        # turn off gradient calculation
        with torch.no_grad():
            for X, y in test_loader:  # load a testing batch
                # send X and y to suitable device for model
                X = X.to(device)
                y = y.to(device)
                # forward
                p = model(X)
                # finding max value in each row, return indexes of max values
                pred = p.data.max(1, keepdim=True)[1]
                # count correct predictions
                num_correct += pred.eq(y.data.view_as(pred)).sum()

        # calculating accuracy
        acc = num_correct / len(test_loader.dataset)
        # for pruning (stops trial early if not promising)
        # trial.report(acc, epoch)
        # # handle pruning based on the intermediate value.
        # if trial.should_prune():
        #     raise optuna.exceptions.TrialPruned()

    return acc

In [9]:
# create an optuna study to maximize test accuracy
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=None, timeout=time_out, show_progress_bar=True, gc_after_trial=True)

[I 2023-07-01 17:54:49,869] A new study created in memory with name: no-name-d55d1176-9c49-4766-a697-f52e7844575a


   0%|          | 00:00/30:00

[I 2023-07-01 17:58:00,189] Trial 0 finished with value: 0.18310000002384186 and parameters: {'num_conv_layers': 2, 'num_dense_layers': 2, 'num_filter_conv0': 40.0, 'num_filter_conv1': 56.0, 'num_neuron_dense0': 64.0, 'optimizer_name': 'SGD', 'lr': 0.002596746208671124}. Best is trial 0 with value: 0.18310000002384186.
[I 2023-07-01 18:00:59,033] Trial 1 finished with value: 0.009999999776482582 and parameters: {'num_conv_layers': 4, 'num_dense_layers': 1, 'num_filter_conv0': 32.0, 'num_filter_conv1': 56.0, 'num_filter_conv2': 40.0, 'num_filter_conv3': 24.0, 'optimizer_name': 'RMSprop', 'lr': 0.0033632320562460966}. Best is trial 0 with value: 0.18310000002384186.
[I 2023-07-01 18:04:02,244] Trial 2 finished with value: 0.009999999776482582 and parameters: {'num_conv_layers': 4, 'num_dense_layers': 1, 'num_filter_conv0': 16.0, 'num_filter_conv1': 8.0, 'num_filter_conv2': 56.0, 'num_filter_conv3': 32.0, 'optimizer_name': 'Adam', 'lr': 0.003871012680026735}. Best is trial 0 with value: 0

In [10]:
# Find number of pruned and completed trials
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

In [11]:
# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

trial = study.best_trial
print("Best trial:")
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  0
  Number of complete trials:  10
Best trial:
  Value:  0.2678000032901764
  Params: 
    num_conv_layers: 1
    num_dense_layers: 1
    num_filter_conv0: 40.0
    optimizer_name: SGD
    lr: 0.008202030880803337


In [12]:
# Save results to csv file
df = study.trials_dataframe().drop(['datetime_start', 'datetime_complete', 'duration'], axis=1)  # Exclude columns
df = df.loc[df['state'] == 'COMPLETE']        # Keep only results that did not prune
df = df.drop('state', axis=1)                 # Exclude state column
df = df.sort_values('value')                  # Sort based on accuracy
df.to_csv('optuna_results.csv', index=False)  # Save to csv file

# Display results in a dataframe
print("\nOverall Results (ordered by accuracy):\n {}".format(df))

# Find the most important hyperparameters
most_important_parameters = optuna.importance.get_param_importances(study, target=None)

# Display the most important hyperparameters
print('\nMost important hyperparameters:')
for key, value in most_important_parameters.items():
    print('  {}:{}{:.2f}%'.format(key, (15-len(key))*' ', value*100))


Overall Results (ordered by accuracy):
    number   value  params_lr  params_num_conv_layers  params_num_dense_layers  \
1       1  0.0100   0.003363                       4                        1   
2       2  0.0100   0.003871                       4                        1   
3       3  0.0100   0.005127                       2                        1   
5       5  0.0100   0.002329                       4                        3   
6       6  0.0100   0.001210                       1                        3   
9       9  0.0100   0.008666                       4                        3   
7       7  0.1609   0.002364                       3                        2   
0       0  0.1831   0.002597                       2                        2   
8       8  0.2003   0.001631                       1                        2   
4       4  0.2678   0.008202                       1                        1   

   params_num_filter_conv0  params_num_filter_conv1  params_num_fil