# General imports & inizializations

In [1]:
import os
import random
import numpy as np
from typing import * #general imports

import torch
import pytorch_lightning as pl #pytorch imports
from pytorch_lightning.callbacks import ModelCheckpoint


from DataModules import Datamodule
import utils, config
from mlp import MLPModule #package imports

import wandb #weights and biases

Reproducibility stuff

In [2]:
seed = 17
pl.seed_everything(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False #seeds and deterministic

os.chdir(config.ROOT_PATH)

Global seed set to 17


# Dataset & hyperparameters choice

In [3]:
dataset = "cifar-10" #the dataset you want to train on
run_name = "checkpoint_all_param"
train_batch_size = 32 #training batch size
test_batch_size = 128 #test & validation batch size
epochs = 20#number of epochs to train
learning_rate = 1e-03 #initial learning rate

## Weights and biases setup

In [4]:
wandb.init(project = "Batch-norm-only", entity = "ale99")
wandb.run.name = run_name

[34m[1mwandb[0m: Currently logged in as: [33male99[0m (use `wandb login --relogin` to force relogin)


In [5]:
wandb.define_metric("epoch")
wandb.define_metric("validation_loss", step_metric = "epoch", summary = "min")
wandb.define_metric("validation_accuracy", step_metric = "epoch", summary = "max")
wandb.define_metric("training_loss", step_metric = "epoch", summary = "min")

<wandb.sdk.wandb_metric.Metric at 0x7f43436e2dd0>

In [6]:
wandb.config = {
                "dataset" : dataset,  #the dataset you want to train on
                "train_batch_size" : train_batch_size, #training batch size
                "test_batch_size" : test_batch_size, #test & validation batch size
                "epochs" : epochs, #number of epochs to train
                "learning_rate" : learning_rate #initial learning rate
                }

# Define datamodule and model

In [7]:
data = Datamodule(dataset, train_batch_size, test_batch_size, utils.dataset_options) #the datamodule

params = utils.dataset_options[dataset][1] #the parameters of our model
mlp = MLPModule(params[0], params[1], params[2], params[3], params[4], batch_norm_only = False) #the model
wandb.watch(mlp, log = "all", log_graph = True, log_freq = 1)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


[<wandb.wandb_torch.TorchGraph at 0x7f43424ff610>]

Set up the trainer

In [8]:
checkpoint = ModelCheckpoint("checkpoints/",monitor = "val_loss", mode = "min")
trainer = pl.Trainer(max_epochs = epochs, gpus = 1, callbacks = [checkpoint]) #define the trainer

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


And train!

In [10]:
trainer.fit(model = mlp, datamodule = data)
#wandb.finish()

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type             | Params
--------------------------------------------------
0 | batch_n0     | BatchNorm1d      | 6.1 K 
1 | layer_1      | Linear           | 3.1 M 
2 | batch_n1     | BatchNorm1d      | 2.0 K 
3 | layer_2      | Linear           | 131 K 
4 | batch_n2     | BatchNorm1d      | 256   
5 | layer_3      | Linear           | 4.1 K 
6 | batch_n3     | BatchNorm1d      | 64    
7 | layer_output | Linear           | 330   
8 | softmax      | Softmax          | 0     
9 | loss_fn      | CrossEntropyLoss | 0     
--------------------------------------------------
3.3 M     Trainable params
0         Non-trainable params
3.3 M     Total params
13.164    Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 17


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


  rank_zero_deprecation(


[{}]

In [12]:
best_model = MLPModule.load_from_checkpoint(checkpoint.best_model_path)
trainer.test(model = best_model, dataloaders = data.test_dataloader())

  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{}
--------------------------------------------------------------------------------


  rank_zero_deprecation(


[{}]

In [13]:
wandb.finish()

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
training_loss,█▇▆▆▅▅▄▄▄▄▃▃▃▂▂▂▂▁▁▁
validation_accuracy,▁▇▇▇██████████████████
validation_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃

0,1
epoch,20.0
test_accuracy,0.5626
