# Training AdaMM

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch


%load_ext autoreload
%autoreload 2

from run import main, experiments
from utils import read_json

In [None]:
def multiple_trainings_subplots(title, train_losses, validation_losses, validation_accuracies, epoch_time):

    fig, axs = plt.subplots(1, 3)
    fig.suptitle(title, fontsize=16)
    axs.flat[0].plot(train_losses, label= "Train Loss")
    axs.flat[0].plot(validation_losses, label= "Test Loss")
    axs.flat[0].set_title("Losses")
    axs.flat[0].legend()
    axs.flat[1].plot(validation_accuracies, label= "Accuracy")
    axs.flat[1].set_title("Accuracy")
    axs.flat[1].legend()
    axs.flat[2].hist(epoch_time, label= "Time", bins=30)
    mean = np.asarray(epoch_time).mean()
    axs.flat[2].axvline(mean, color = 'red', lw = 3, label = f"mean: {np.round(mean, 3)}")
    axs.flat[2].set_title("Execution time")
    axs.flat[2].legend()

    #fig.supxlabel("Epochs")
    #fig.supylabel("Metrics")
    fig = plt.gcf()
    fig.set_size_inches(18, 5)

    plt.show()

## EfficientNet B0

In [None]:
config = {
    "seed": 23,
    "batch_size": 128,
    "net": 'b0',
    "optimizer": 'AdaMM',
    "epochs": 10,
    "verbose": True
}

In [None]:
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']='Our-AdaMM'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)


In [None]:
config['optimizer'] = 'ZO-AdaMM'
config['lr'] = 1e-4
config['beta1'] = 0.8
config['beta2'] = 0.95
config['mu'] = 1e-4
config['epochs'] = 20

with torch.no_grad():
    train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config, zo_optim=True)

In [None]:
multiple_trainings_subplots("ZO-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)


In [None]:
config['optimizer'] = 'ZO-AdaMM'
config['lr'] = 1e-4
config['beta1'] = 0.8
config['beta2'] = 0.95
config['mu'] = 1e-4
config['epochs'] = 20

with torch.no_grad():
    train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config, zo_optim=True)

In [None]:
multiple_trainings_subplots("ZO-AdaMM and EfficientNet B0",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B1

In [None]:
config['optimizer']='AdaMM'
config['net']='b1'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B1",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B1",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B2

In [None]:
config['optimizer']='AdaMM'
config['net']='b2'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B2",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B2",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B3

In [None]:
config['optimizer']='AdaMM'
config['net']='b3'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B3",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B3",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B4

In [None]:
config['optimizer']='AdaMM'
config['net']='b4'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B4",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B4",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B5

In [None]:
config['optimizer']='AdaMM'
config['net']='b5'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B5",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B5",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B6

In [None]:
config['optimizer']='AdaMM'
config['net']='b6'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B6",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B6",train_losses, validation_losses, validation_accuracies, epoch_time)

## EfficientNet B7

In [None]:
config['optimizer']='AdaMM'
config['net']='b7'
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("AdaMM and EfficientNet B7",train_losses, validation_losses, validation_accuracies, epoch_time)

In [None]:
config['optimizer']="Our-AdaMM"
train_losses, validation_losses, validation_accuracies, epoch_time = main(False, config = config)

In [None]:
multiple_trainings_subplots("Our-AdaMM and EfficientNet B7",train_losses, validation_losses, validation_accuracies, epoch_time)

# Experiments

In [2]:
# Basic configuration
config = {
    "seed": 42,
    "batch_size": 128,
    "net": "scalable",
    "scale": 1.0/9,
    "opt_params": [1e-3, 0.9, 0.999, 1e-8],
    "optimizer": 'ZO-AdaMM',
    "epochs": 50,
    "dataset": "mnist",
    "zo_optim": True,
    "mu": 1e-3,
    "use_scheduler": True,
    "verbose": True
}

In [7]:
# Define the different scales of the model
scales = torch.linspace(1.0/9, 120 - 1/9, 50).tolist()
scales = [scales[1]]

In [8]:
experiments(config, 'results', scales, 1, record_weights=True, weights_path=f'results/weights/weights_sequence_{scales[0]}')

Scale set to : 2.555555582046509
Running configuration:
    seed : 42
    batch_size : 128
    net : scalable
    scale : 2.555555582046509
    opt_params : [0.001, 0.9, 0.999, 1e-08]
    optimizer : ZO-AdaMM
    epochs : 50
    dataset : mnist
    zo_optim : True
    mu : 0.001
    use_scheduler : True
    verbose : True
Seed set to : 42
Device used:  cpu 

d= 28817, sqrt(d)= 169.75570682601514
Epoch: 1/50 |train loss: 2.2234 |test loss: 2.0299 |acc: 0.3999 |time: 8.9743
Epoch: 6/50 |train loss: 0.9578 |test loss: 0.9065 |acc: 0.7071 |time: 9.3306
Epoch: 11/50 |train loss: 0.8520 |test loss: 0.8302 |acc: 0.7376 |time: 10.5599
Epoch: 16/50 |train loss: 0.7969 |test loss: 0.7636 |acc: 0.7575 |time: 12.0046
Epoch: 21/50 |train loss: 0.7426 |test loss: 0.7270 |acc: 0.7711 |time: 10.4136
Mu reduced from 0.001 to 0.0005 on param_group 0
Learning rate reduced from 0.001 to 0.0005 on param_group 0
Epoch: 26/50 |train loss: 0.6884 |test loss: 0.6604 |acc: 0.7921 |time: 11.9468
Epoch: 31/50 |tr