# Combined version of Madgrad and AdaBelief for Image Classification on CIFAR10

### Import relevant librairies

In [1]:
! pip install madgrad
! pip install adabelief-pytorch==0.2.0
! pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting madgrad
  Downloading madgrad-1.2-py3-none-any.whl (11 kB)
Installing collected packages: madgrad
Successfully installed madgrad-1.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting adabelief-pytorch==0.2.0
  Downloading adabelief_pytorch-0.2.0-py3-none-any.whl (5.7 kB)
Collecting colorama>=0.4.0
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: colorama, adabelief-pytorch
Successfully installed adabelief-pytorch-0.2.0 colorama-0.4.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-2.10.1-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 17.2 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloadin

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from adabelief_pytorch import AdaBelief
from madgrad import MADGRAD
import numpy as np
import time

### Link to the Drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/EPFL/OptML/')

Mounted at /content/drive


### Configure the GPU

The second line is only valid if a GPU is available.

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.get_device_name(0)

'Tesla T4'

### Load CIFAR10 dataset

In [5]:
# Transforamtion of the datasets into normalized tensors
transform = transforms.Compose(
[transforms.ToTensor(),
 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset= torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True)

testset= torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


### Define the model and train it

In this section, we define the model and we train it for 5 different seeds. At the end of each seed, we save the data in the drive.

Here we change the optimizer from Madgrad to AdaBelief at switching time T.

In [7]:
# Run the training 5 times
for i in range(5):
    print(i) 
    start_time = time.time()
    torch.manual_seed(i)
    
    # Define the ResNet18 model 
    model = models.resnet18()

    # Define the loss
    criterion = nn.CrossEntropyLoss()

    # Hyper-parameter for the switching time
    T = 26

    # Configure the optimizer with default hyperparameters
    optimizer = MADGRAD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0, eps=1e-6, decouple_decay=False)
    
    model.to(device)
    num_epochs = 100
    total_step = len(trainloader)
    loss_trial = []
    acc_trial = []
    
    # Train the model
    for epoch in range(num_epochs):

        # Check if we switch the optimizer to AdaBelief
        if epoch == T :
            optimizer = AdaBelief(model.parameters(), lr=0.001, betas=(0.9,0.999), eps=1e-16, weight_decouple=False, rectify=False)

        total_loss = 0
        model.train()
        
        # Training for one epoch
        for j, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            # Set the parameter gradients to zero
            optimizer.zero_grad()

            # Backward and optimize
            loss.backward()
            optimizer.step()

        print ("Epoch {}/{}, Loss: {:.4f}"
                      .format(epoch+1, num_epochs, loss.item()))
        
        # Store the loss at the end of the epoch
        loss_trial.append(total_loss/total_step)

        # Test the model at the end of each epoch
        model.eval()
        with torch.no_grad():
            
            # Count the number of good predictions
            correct = 0
            total = 0
            for images, labels in testloader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                # Check if the prediction matches the exact label value
                correct += (predicted == labels).sum().item()
        
        # Store the accuracy at the end of the epoch
        acc_trial.append(100 * correct/total)

    # Total time for the training
    train_time = time.time() - start_time
    
    # Print relevant results
    print(train_time)
    print('Accuracy on the test images: {} %'.format(100 * correct / total))
    print(loss_trial[-1])
    
    # Save the results externally
    np.save('/content/drive/MyDrive/EPFL/OptML/loss_resnet18_combined_T{}_episode_{}.npy'.format(T, i), loss_trial)
    np.save('/content/drive/MyDrive/EPFL/OptML/acc_resnet18_combined_T{}_episode_{}.npy'.format(T, i), acc_trial)
    

0
Epoch 1/100, Loss: 1.3364
Epoch 2/100, Loss: 0.7776
Epoch 3/100, Loss: 0.9733
Epoch 4/100, Loss: 0.6601
Epoch 5/100, Loss: 0.6816
Epoch 6/100, Loss: 0.4947
Epoch 7/100, Loss: 0.4411
Epoch 8/100, Loss: 0.4857
Epoch 9/100, Loss: 0.3892
Epoch 10/100, Loss: 0.3789
Epoch 11/100, Loss: 0.3288
Epoch 12/100, Loss: 0.3617
Epoch 13/100, Loss: 0.1003
Epoch 14/100, Loss: 0.3243
Epoch 15/100, Loss: 0.1239
Epoch 16/100, Loss: 0.1683
Epoch 17/100, Loss: 0.1085
Epoch 18/100, Loss: 0.2198
Epoch 19/100, Loss: 0.1015
Epoch 20/100, Loss: 0.0793
Epoch 21/100, Loss: 0.0506
Epoch 22/100, Loss: 0.0849
Epoch 23/100, Loss: 0.1140
Epoch 24/100, Loss: 0.0369
Epoch 25/100, Loss: 0.1106
Epoch 26/100, Loss: 0.0655
[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False          

KeyboardInterrupt: ignored