# Combined version of Madgrad and AdaBelief for Image Classification on CIFAR10

### Import relevant librairies

In [8]:
! pip install madgrad
! pip install adabelief-pytorch==0.2.0
! pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [9]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from adabelief_pytorch import AdaBelief
from madgrad import MADGRAD
import numpy as np
import time

### Link to the Drive

In [10]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/EPFL/OptML/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Configure the GPU

The second line is only valid if a GPU is available.

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.get_device_name(0)

'Tesla T4'

### Load CIFAR10 dataset

In [12]:
# Transforamtion of the datasets into normalized tensors
transform = transforms.Compose(
[transforms.ToTensor(),
 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset= torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True)

testset= torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


### Define the model and train it

In this section, we define the model and we train it for 5 different seeds. At the end of each seed, we save the data in the drive.

Here we change the optimizer from Madgrad to AdaBelief at switching time T.

In [13]:
# Run the training 5 times
for i in range(5):
    print(i) 
    start_time = time.time()
    torch.manual_seed(i)
    
    # Define the ResNet18 model 
    model = models.resnet18()

    # Define the loss
    criterion = nn.CrossEntropyLoss()

    # Hyper-parameter for the switching time
    T = 26

    # Configure the optimizer with default hyperparameters
    optimizer = MADGRAD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0, eps=1e-6, decouple_decay=False)
    
    model.to(device)
    num_epochs = 100
    total_step = len(trainloader)
    loss_trial = []
    acc_trial = []
    
    # Train the model
    for epoch in range(num_epochs):

        # Check if we switch the optimizer to AdaBelief
        if epoch == T :
            optimizer = AdaBelief(model.parameters(), lr=0.001, betas=(0.9,0.999), eps=1e-16, weight_decouple=False, rectify=False)

        total_loss = 0
        model.train()
        
        # Training for one epoch
        for j, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            # Set the parameter gradients to zero
            optimizer.zero_grad()

            # Backward and optimize
            loss.backward()
            optimizer.step()

        print ("Epoch {}/{}, Loss: {:.4f}"
                      .format(epoch+1, num_epochs, loss.item()))
        
        # Store the loss at the end of the epoch
        loss_trial.append(total_loss/total_step)

        # Test the model at the end of each epoch
        model.eval()
        with torch.no_grad():
            
            # Count the number of good predictions
            correct = 0
            total = 0
            for images, labels in testloader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                # Check if the prediction matches the exact label value
                correct += (predicted == labels).sum().item()
        
        # Store the accuracy at the end of the epoch
        acc_trial.append(100 * correct/total)

    # Total time for the training
    train_time = time.time() - start_time
    
    # Print relevant results
    print(train_time)
    print('Accuracy on the test images: {} %'.format(100 * correct / total))
    print(loss_trial[-1])
    
    # Save the results externally
    np.save('/content/drive/MyDrive/EPFL/OptML/loss_resnet18_combined_T{}_episode_{}.npy'.format(T, i), loss_trial)
    np.save('/content/drive/MyDrive/EPFL/OptML/acc_resnet18_combined_T{}_episode_{}.npy'.format(T, i), acc_trial)
    

0
Epoch 1/100, Loss: 1.3803
Epoch 2/100, Loss: 0.6371
Epoch 3/100, Loss: 0.9246
Epoch 4/100, Loss: 0.6219
Epoch 5/100, Loss: 0.7822
Epoch 6/100, Loss: 0.5588
Epoch 7/100, Loss: 0.5485
Epoch 8/100, Loss: 0.4295
Epoch 9/100, Loss: 0.6224
Epoch 10/100, Loss: 0.3011
Epoch 11/100, Loss: 0.3463
Epoch 12/100, Loss: 0.5159
Epoch 13/100, Loss: 0.2475
Epoch 14/100, Loss: 0.1441
Epoch 15/100, Loss: 0.1788
Epoch 16/100, Loss: 0.2145
Epoch 17/100, Loss: 0.1242
Epoch 18/100, Loss: 0.1352
Epoch 19/100, Loss: 0.0555
Epoch 20/100, Loss: 0.2055
Epoch 21/100, Loss: 0.1121
Epoch 22/100, Loss: 0.1489
Epoch 23/100, Loss: 0.0659
Epoch 24/100, Loss: 0.0781
Epoch 25/100, Loss: 0.0477
Epoch 26/100, Loss: 0.1535
[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False          