Train a CNN based classification model and perform Optimized Hyperparameter Tuning using Optuna Library on the below-mentioned dataset. Perform 100 trials.

Reference:

https://github.com/elena-ecn/optuna-optimization-for-PyTorch-CNN/blob/main/optuna_optimization.py


Install optuna library

In [None]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.1.1-py3-none-any.whl (365 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.10.3-py3-none-any.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.3/212.3 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
Collecting Mako
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.10.3 cmaes-0.9.1 colorlog-6.7.0 optuna-3.1.1


Import all required libraries

In [None]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import optuna
from optuna.trial import TrialState
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

Set hyperparameters

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Batch_size=128
CLASSES=10
EPOCHS=10
N_training_samples=Batch_size*30
N_validation_samples=Batch_size*10

Define the CNN model

In [None]:
def define_model(trial):
  #number of CNN layers are 3 to 5
  n_layers=trial.suggest_int("n_layers",3,5)
  layers=[]
  in_features=1
  img_size=28
  out_features=16
  for i in range(n_layers):
    #in_features are number of channels, out features are the output size of each layer
    layers.append(nn.Conv2d(in_features, out_features, 3))
    layers.append(nn.ReLU())
    #By the convolution image size is decreasing by 2
    img_size=img_size-2
    #Update the input and output feature size for every layer
    in_features=out_features
    out_features=out_features*2
  #Flatten the layer
  layers.append(nn.Flatten())
  #Get number of classes output
  layers.append(nn.Linear(in_features*img_size*img_size, CLASSES))
  layers.append(nn.LogSoftmax(dim=1))

  return nn.Sequential(*layers)

Load dataset

In [None]:
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(trainset, batch_size=Batch_size, shuffle=True, num_workers=2)
valid_loader = torch.utils.data.DataLoader(testset, batch_size=Batch_size, shuffle=True, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 123006154.62it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 111542996.15it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 43753464.69it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4708484.62it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



train function

In [None]:
def train_function(model,train_loader,optimizer):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
      if batch_idx*Batch_size>=N_training_samples:
        break

      data, target =data.to(device),target.to(device)
      optimizer.zero_grad()
      output = model(data)
      #calculate loss function
      loss = F.nll_loss(output, target)
      loss.backward()
      optimizer.step()

Test function

In [None]:
def test_function(model, valid_loader,criterion):
  model.eval()
  #variable to keep track of correctly classified samples
  correct=0
  with torch.no_grad():
    for batch_idx, (data, target) in enumerate(valid_loader):
        if batch_idx*Batch_size>=N_validation_samples:
          break

        data, target =data.to(device),target.to(device)
        output = model(data)
        pred=output.argmax(dim=1,keepdim=True)
        correct+=pred.eq(target.view_as(pred)).sum().item()

  accuracy=correct/min(len(valid_loader.dataset),N_validation_samples)
  return accuracy

Objective function to run the model

In [None]:
def objective(trial):
    model=define_model(trial).to(device)
    #Number of epochs are mentioned as 10 to 50
    num_epochs = trial.suggest_int('num_epochs', 10, 50)
    #Choose learning rate from 1e-4, 1e-1 range
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)

    # Define the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # Training of the model
    for epoch in range(num_epochs):
        train_function(model,train_loader,optimizer,criterion)
        accuracy=test_function(model, valid_loader)

        # Pruning, to stop early if it is not giving good results
        trial.report(accuracy, epoch)
        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

Driver code

In [None]:
# Create an Optuna study to maximize test accuracy
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)
# Find number of pruned and completed trials
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# Display the study statistics
print("\nStudy statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

trial = study.best_trial
print("Best trial:")
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2023-04-22 19:05:23,452][0m A new study created in memory with name: no-name-4ac71109-cbdd-49e8-ac4f-f436fd67315b[0m
[32m[I 2023-04-22 19:05:53,562][0m Trial 0 finished with value: 0.9875 and parameters: {'n_layers': 3, 'num_epochs': 25, 'learning_rate': 0.0016415001469332474}. Best is trial 0 with value: 0.9875.[0m
[32m[I 2023-04-22 19:06:16,937][0m Trial 1 finished with value: 0.94921875 and parameters: {'n_layers': 3, 'num_epochs': 20, 'learning_rate': 0.00010191922414885857}. Best is trial 0 with value: 0.9875.[0m
[32m[I 2023-04-22 19:07:03,465][0m Trial 2 finished with value: 0.97734375 and parameters: {'n_layers': 5, 'num_epochs': 36, 'learning_rate': 0.004198283595653838}. Best is trial 0 with value: 0.9875.[0m
[32m[I 2023-04-22 19:08:00,545][0m Trial 3 finished with value: 0.93984375 and parameters: {'n_layers': 4, 'num_epochs': 47, 'learning_rate': 0.016232497204092512}. Best is trial 0 with value: 0.9875.[0m
[32m[I 2023-04-22 19:09:02,466][0m Trial 4 f


Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.9921875
  Params: 
    n_layers: 3
    num_epochs: 38
    learning_rate: 0.002936918342296688
