Key steps for assignment:
- Get the data for the comparison
- Get the models used for hte comparison
- Train the models with suggested hyperparameters (compare the test results with a given number of epochs)
- OR train the models until convergence and compare the resutls (number of steps and test results)
- Hyperparameter tuninig for a fixed number of trials
- Compare the improvements with tuning

Tasks:
- image classification (CIFAR10, CIFAR100) -> ResNet18, ResNet34, ResNet50 (but freaking huge)
- image recognition (ImageNet (maybe)) -> ResNet18 or ResNet50 (ImageNet more pain in the ass to download)
- Denosing (SIDD dataset) -> UNet or ResUnet
- GAN (CIFAR10) -> WassersteinGAN
- Language modelling (Penn TreeBank)

In [1]:
! pip install madgrad
! pip install adabelief-pytorch==0.2.0
! pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting madgrad
  Downloading madgrad-1.2-py3-none-any.whl (11 kB)
Installing collected packages: madgrad
Successfully installed madgrad-1.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting adabelief-pytorch==0.2.0
  Downloading adabelief_pytorch-0.2.0-py3-none-any.whl (5.7 kB)
Collecting colorama>=0.4.0
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: colorama, adabelief-pytorch
Successfully installed adabelief-pytorch-0.2.0 colorama-0.4.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 12.8 MB/s 
[?25hCollecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81

Import relevant libraries

In [2]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/EPFL/OptML/')

Mounted at /content/drive


In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from adabelief_pytorch import AdaBelief
from madgrad import MADGRAD
import numpy as np
import time

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.get_device_name(0)

'Tesla T4'

### DATA

In [6]:
# data = "CIFAR10"
data = "CIFAR100"
# data = "ImageNet"

if data == "CIFAR10":
    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset= torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                              shuffle=True)
    testset= torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                             shuffle=False)
if data == "CIFAR100":
    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                              shuffle=True)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                             shuffle=False)

# For now not working. Need to download the data locally first
if data == "ImageNet":
    trainset = torchvision.datasets.ImageNet(root='./data', train=True,
                                        download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                              shuffle=True)
    testset = torchvision.datasets.ImageNet(root='./data', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                             shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# Choose the model to use (uncomment the line)

model = models.resnet18()
# model = models.resnet34()

In [None]:
# Loss and optimizers with suggested hyperparameters for CIFAR
criterion = nn.CrossEntropyLoss()
# optimizer = AdaBelief(model.parameters(), lr=0.001, betas=(0.9,0.999), eps=1e-16, weight_decouple=False, rectify=False) #fixed_decay=False, amsgrad=False, weight_decay=5e-4
optimizer = MADGRAD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0, eps=1e-6, decouple_decay=False)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

Train the model

In [None]:
for i in range(5):
    print(i) 
    start_time = time.time()
    torch.manual_seed(i)    
    # model = models.resnet18()
    model = models.resnet34()
    # Loss and optimizers with suggested hyperparameters for CIFAR
    criterion = nn.CrossEntropyLoss()
    optimizer = AdaBelief(model.parameters(), lr=0.001, betas=(0.9,0.999), eps=1e-16, weight_decouple=False, rectify=False) #fixed_decay=False, amsgrad=False, weight_decay=5e-4
    # optimizer = MADGRAD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0, eps=1e-6, decouple_decay=False)
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # Train the model for image classification
    model.to(device)
    num_epochs = 100
    total_step = len(trainloader)
    loss_trial = []
    acc_trial = []
    for epoch in range(num_epochs):
        total_loss = 0
        model.train()
        for j, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (j+1) % 100 == 0:
                print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                      .format(epoch+1, num_epochs, j+1, total_step, loss.item()))
        loss_trial.append(total_loss/total_step)
        # Test the model
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in testloader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        acc_trial.append(100 * correct / total)
    train_time = time.time() - start_time
    print(train_time)
    np.save('/content/drive/MyDrive/EPFL/OptML/loss_resnet32_adabelief_episode_{}.npy'.format(i), loss_trial)
    np.save('/content/drive/MyDrive/EPFL/OptML/acc_resnet32_adabelief_episode_{}.npy'.format(i), acc_trial)
    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

0
[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False              False
>=0.1.0 (Current 0.2.0)  1e-16  True               True
[34mSGD better than Adam (e.g. CNN for Image Classification)    Adam better than SGD (e.g. Transformer, GAN)
----------------------------------------------------------  ----------------------------------------------
Recommended eps = 1e-8                                      Recommended eps = 1e-16
[34mFor a complete table of recommended hyperparameters, see
[34mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[32mYou can disable the log message by setting "print_change_log = False", though it is recommended to keep as a reminder.
[0m
Epoch [1/100], Step [100/391] Loss: 3.9122
Epoch [1/100], Step [200/391] Los

In [None]:
np.save('/content/drive/MyDrive/EPFL/OptML/losses_resnet18_sgd.npy', losses)
np.save('/content/drive/MyDrive/EPFL/OptML/accs_resnet18_sgd.npy', accuracies)

In [None]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

Accuracy of the model on the test images: 77.62 %
