# Training AdaMM

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import torch


%load_ext autoreload
%autoreload 2

from run import main, experiments
from utils import read_json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
f = read_json('results/result_ZO-AdaMM_0.111111.json')

In [8]:
f['values']

[{'epoch_time': [7.47142767906189,
   10.247822046279907,
   13.860043048858643,
   15.669544219970703,
   15.639132738113403,
   15.749696493148804,
   15.782460927963257,
   15.702332019805908,
   15.723284244537354,
   15.807753324508667,
   15.703550815582275,
   16.017969369888306,
   19.223718404769897,
   19.289368629455566,
   19.191837549209595,
   19.175705432891846,
   19.493349313735962,
   19.094457149505615,
   19.25118637084961,
   19.387230157852173,
   18.907305479049683,
   19.42698097229004,
   19.243716955184937,
   19.319130182266235,
   19.290332555770874,
   19.368346214294434,
   19.40012240409851,
   17.545212507247925,
   15.606734275817871,
   15.666735887527466,
   15.58454179763794,
   15.55439019203186,
   15.761765241622925,
   15.545779466629028,
   15.640367031097412,
   15.608924150466919,
   15.63880729675293,
   15.78151535987854,
   15.677872657775879,
   15.856916666030884,
   15.731034994125366,
   15.755399703979492,
   15.68358588218689,
   15.5

In [None]:
def multiple_trainings_subplots(title, train_losses, validation_losses, validation_accuracies, epoch_time):

    fig, axs = plt.subplots(1, 3)
    fig.suptitle(title, fontsize=16)
    axs.flat[0].plot(train_losses, label= "Train Loss")
    axs.flat[0].plot(validation_losses, label= "Test Loss")
    axs.flat[0].set_title("Losses")
    axs.flat[0].legend()
    axs.flat[1].plot(validation_accuracies, label= "Accuracy")
    axs.flat[1].set_title("Accuracy")
    axs.flat[1].legend()
    axs.flat[2].hist(epoch_time, label= "Time", bins=30)
    mean = np.asarray(epoch_time).mean()
    axs.flat[2].axvline(mean, color = 'red', lw = 3, label = f"mean: {np.round(mean, 3)}")
    axs.flat[2].set_title("Execution time")
    axs.flat[2].legend()

    #fig.supxlabel("Epochs")
    #fig.supylabel("Metrics")
    fig = plt.gcf()
    fig.set_size_inches(18, 5)

    plt.show()

## EfficientNet B0

In [None]:
config = {
    "seed": 23,
    "batch_size": 128,
    "net": 'b0',
    "optimizer": 'AdaMM',
    "epochs": 10,
    "verbose": True
}

In [None]:
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']='Our-AdaMM'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)


In [None]:
config['optimizer'] = 'ZO-AdaMM'
config['lr'] = 1e-4
config['beta1'] = 0.8
config['beta2'] = 0.95
config['mu'] = 1e-4
config['epochs'] = 20

with torch.no_grad():
    train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config, zo_optim=True)

In [None]:
multiple_trainings_subplots("ZO-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)


In [None]:
config['optimizer'] = 'ZO-AdaMM'
config['lr'] = 1e-4
config['beta1'] = 0.8
config['beta2'] = 0.95
config['mu'] = 1e-4
config['epochs'] = 20

with torch.no_grad():
    train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config, zo_optim=True)

In [None]:
multiple_trainings_subplots("ZO-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B1

In [None]:
config['optimizer']='AdaMM'
config['net']='b1'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B1",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B1",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B2

In [None]:
config['optimizer']='AdaMM'
config['net']='b2'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B2",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B2",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B3

In [None]:
config['optimizer']='AdaMM'
config['net']='b3'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B3",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B3",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B4

In [None]:
config['optimizer']='AdaMM'
config['net']='b4'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B4",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B4",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B5

In [None]:
config['optimizer']='AdaMM'
config['net']='b5'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B5",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B5",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B6

In [None]:
config['optimizer']='AdaMM'
config['net']='b6'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B6",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B6",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B7

In [None]:
config['optimizer']='AdaMM'
config['net']='b7'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B7",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B7",train_losses, validation_losses, validation_accuracies, epoch_time)

# Experiments

In [1]:
# Basic configuration
config = {
    "seed": 42,
    "batch_size": 128,
    "net": "scalable",
    "scale": 1.0/9,
    "opt_params": [1e-3, 0.9, 0.999, 1e-8],
    "optimizer": 'ZO-AdaMM',
    "epochs": 50,
    "dataset": "mnist",
    "zo_optim": True,
    "mu": 1e-3,
    "use_scheduler": True,
    "verbose": True
}

In [12]:
# Define the different scales of the model
scales = torch.linspace(1.0/9, 120 - 1/9, 50).tolist()

In [13]:
import pandas as pd

pd.Series(scales).to_csv('sdas', index=False)

In [11]:
experiments(config, 'results', scales, 10)

Scale set to : 0.1111111119389534
Running configuration:
    seed : 42
    batch_size : 128
    net : scalable
    scale : 0.1111111119389534
    opt_params : [0.001, 0.9, 0.999, 1e-08]
    optimizer : ZO-AdaMM
    epochs : 3
    dataset : mnist
    zo_optim : True
    mu : 0.001
    use_scheduler : True
    verbose : True
Seed set to : 42
Device used:  cuda 

d= 1400, sqrt(d)= 37.416573867739416


100%|██████████| 469/469 [00:06<00:00, 69.38it/s]


Epoch: 1/3 |train loss: 2.2978 |test loss: 2.2828 |acc: 0.1616 |time: 7.8134


100%|██████████| 469/469 [00:06<00:00, 69.94it/s]


Epoch: 2/3 |train loss: 2.2455 |test loss: 2.1789 |acc: 0.1512 |time: 7.6073


100%|██████████| 469/469 [00:06<00:00, 71.45it/s]


Epoch: 3/3 |train loss: 2.0850 |test loss: 1.9683 |acc: 0.2359 |time: 7.4779
Running configuration:
    seed : 43
    batch_size : 128
    net : scalable
    scale : 0.1111111119389534
    opt_params : [0.001, 0.9, 0.999, 1e-08]
    optimizer : ZO-AdaMM
    epochs : 3
    dataset : mnist
    zo_optim : True
    mu : 0.001
    use_scheduler : True
    verbose : True
Seed set to : 43
Device used:  cuda 

d= 1400, sqrt(d)= 37.416573867739416


100%|██████████| 469/469 [00:06<00:00, 72.42it/s]


Epoch: 1/3 |train loss: 2.3036 |test loss: 2.2914 |acc: 0.1124 |time: 7.4406


100%|██████████| 469/469 [00:06<00:00, 73.66it/s]


Epoch: 2/3 |train loss: 2.2780 |test loss: 2.2479 |acc: 0.2400 |time: 7.2536


100%|██████████| 469/469 [00:06<00:00, 72.93it/s]


Epoch: 3/3 |train loss: 2.1588 |test loss: 2.0024 |acc: 0.3137 |time: 7.3201
Scale set to : 1.3333333730697632
Running configuration:
    seed : 42
    batch_size : 128
    net : scalable
    scale : 1.3333333730697632
    opt_params : [0.001, 0.9, 0.999, 1e-08]
    optimizer : ZO-AdaMM
    epochs : 3
    dataset : mnist
    zo_optim : True
    mu : 0.001
    use_scheduler : True
    verbose : True
Seed set to : 42
Device used:  cuda 

d= 14384, sqrt(d)= 119.9333148045196


100%|██████████| 469/469 [00:06<00:00, 68.12it/s]


Epoch: 1/3 |train loss: 2.2533 |test loss: 2.1351 |acc: 0.3212 |time: 7.7829


100%|██████████| 469/469 [00:07<00:00, 65.80it/s]


Epoch: 2/3 |train loss: 1.8391 |test loss: 1.5511 |acc: 0.5243 |time: 8.1261


100%|██████████| 469/469 [00:06<00:00, 68.56it/s]


Epoch: 3/3 |train loss: 1.3846 |test loss: 1.2411 |acc: 0.6200 |time: 7.7913
Running configuration:
    seed : 43
    batch_size : 128
    net : scalable
    scale : 1.3333333730697632
    opt_params : [0.001, 0.9, 0.999, 1e-08]
    optimizer : ZO-AdaMM
    epochs : 3
    dataset : mnist
    zo_optim : True
    mu : 0.001
    use_scheduler : True
    verbose : True
Seed set to : 43
Device used:  cuda 

d= 14384, sqrt(d)= 119.9333148045196


 54%|█████▍    | 255/469 [00:03<00:03, 69.07it/s]


KeyboardInterrupt: 