# Model Creation


In [None]:
!pip install pytorch_lightning
!pip install torchmetrics


Collecting pytorch_lightning
  Downloading pytorch_lightning-2.2.0.post0-py3-none-any.whl (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.9/800.9 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch_lightning
Successfully installed lightning-utilities-0.10.1 pytorch_lightning-2.2.0.post0 torchmetrics-1.3.1


In [None]:
import torch
import pytorch_lightning as pl
import numpy as np
import os
import random
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchmetrics.classification import MulticlassCalibrationError
from torch.utils.data import DataLoader
from torch.optim import SGD
from torchvision.models import vgg16

Check GPU use

In [None]:
if torch.cuda.is_available():
    print("Found", torch.cuda.device_count(), "CUDA devices!")
    device = torch.cuda.current_device()
    print("\tAttached device is", torch.cuda.get_device_name(device))
else:
    print("We couldn't find any CUDA devices attached to this session!")

Found 1 CUDA devices!
	Attached device is Tesla T4


Set random seed

In [None]:
def set_seeds(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    pl.seed_everything(seed)
set_seeds(42)

INFO:lightning_fabric.utilities.seed:Seed set to 42


# Dataset

Declaring Transforms

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor()])

batch_size = 256

# Random Trainset

https://github.com/pluskid/fitting-random-labels/blob/master/cifar10_data.py


In [None]:
class CIFAR10RandomLabels(datasets.CIFAR10):
  """CIFAR10 dataset, with support for randomly corrupt labels.

  Params
  ------
  corrupt_prob: float
    Default 0.0. The probability of a label being replaced with
    random label.
  num_classes: int
    Default 10. The number of classes in the dataset.
  """
  def __init__(self, corrupt_prob=0.0, num_classes=10, **kwargs):
    super(CIFAR10RandomLabels, self).__init__(**kwargs)
    self.n_classes = num_classes
    if corrupt_prob > 0:
      self.corrupt_labels(corrupt_prob)

  def corrupt_labels(self, corrupt_prob):
    labels = np.array(self.targets)
    np.random.seed(12345)
    mask = np.random.rand(len(labels)) <= corrupt_prob
    rnd_labels = np.random.choice(self.n_classes, mask.sum())
    labels[mask] = rnd_labels
    # we need to explicitly cast the labels from npy.int64 to
    # builtin int type, otherwise pytorch will fail...
    labels = [int(x) for x in labels]

    self.targets = labels

In [None]:
randomtrainset = CIFAR10RandomLabels(1.0,root='./data',download=True,transform=transform,train=True)
randomtrainloader = torch.utils.data.DataLoader(randomtrainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 42946031.61it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


Retrieve Test Set

In [None]:
testset = datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)


Files already downloaded and verified


Recieve Normal Train Set

In [None]:
trainset = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified


# Training Script

In [None]:
def save_model(model,path):
    torch.save(model.state_dict(),path)

In [None]:
def train(model,loss_fn,optimizer,trainloader,testloader,epochs,path):
  size_train = int(np.ceil(len(trainloader.dataset)//trainloader.batch_size))
  size_test = int(np.ceil(len(testloader.dataset)//testloader.batch_size))
  model.to(device)
  step = 0
  for epoch in range(1, epochs + 1):
      model.train()
      ece_train = MulticlassCalibrationError(num_classes=10,n_bins=15,norm='l1').to(device)
      running_loss = 0
      running_test_loss = 0
      for batch, (X, y) in enumerate(trainloader):
          optimizer.zero_grad()
          X = X.to(device)
          y = y.to(device)
          # Compute prediction and loss
          pred = model(X)
          loss = loss_fn(pred, y)
          # Backpropagation
          loss.backward()
          optimizer.step()
          running_loss += loss.item()
          acc = np.mean(
              (torch.argmax(pred, dim=-1) == y).detach().cpu().numpy()
          )
          ece_train.update(pred,y)
      print(
          f"loss: {running_loss/len(trainloader):>7f}, train accuracy: {acc:.5f} "
          f"Train ECE: {ece_train.compute().detach().cpu().item():>5f} "
          f"[epoch {epoch} and batch {batch}/{size_train} (step {step})]"
      )
      if epoch % 10==0:
        evaluate_model(model,testloader,epoch=epoch)

  print('Finished Training')
  save_model(model,path)

# Evaluate model

In [None]:
 def evaluate_model(model,testloader,epoch=None):
    size_test = int(np.ceil(len(testloader.dataset)//testloader.batch_size))
    model.to(device)
    model.eval() # Stop any weight updates on the model (i.e. Batch weights)
    running_test_loss = 0
    ece_test = MulticlassCalibrationError(num_classes=10,n_bins=15,norm='l1').to(device)
    for test_batch, (X_val, y_val) in enumerate(testloader):
      x_val = X_val.to(device)
      y_val = y_val.to(device)
      with torch.no_grad():
        val_pred = model(x_val)
        val_loss = loss_fn(val_pred, y_val)
        running_test_loss += val_loss.item()
        val_acc = np.mean(
            (torch.argmax(val_pred, dim=-1) == y_val).detach().cpu().numpy()
        )
        ece_test.update(val_pred,y_val)
    print(
        "-"*10, "TEST ACC", "-"*10,
        f"val loss: {running_test_loss/len(testloader):>5f}, val accuracy: {val_acc:.4f} "
        f"Test ECE: {ece_test.compute().detach().cpu().item():>5f} "
        )
    if epoch!= None:
          print(f"[epoch {epoch} and batch {test_batch}/{size_test}]",
          "-"*10,"TEST ACC","-"*10)


# Build VGG

Using the VGG model provided by the loss landscape repo. This will allow the visualisations of the loss landscapes to be created easily.

https://github.com/tomgoldstein/loss-landscape/blob/master/cifar10/models/vgg.py

In [None]:
cfg = {
    'VGG9':  [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.input_size = 32
        self.features = self._make_layers(cfg[vgg_name])
        self.n_maps = cfg[vgg_name][-2]
        self.fc = self._make_fc_layers()
        self.classifier = nn.Linear(self.n_maps, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = self.classifier(out)
        return out

    def _make_fc_layers(self):
        layers = []
        layers += [nn.Linear(self.n_maps*self.input_size*self.input_size, self.n_maps),
                   nn.BatchNorm1d(self.n_maps),
                   nn.ReLU(inplace=True)]
        return nn.Sequential(*layers)

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
                self.input_size = self.input_size // 2
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

def VGG9():
    return VGG('VGG9')

def VGG16():
    return VGG('VGG16')

def VGG19():
    return VGG('VGG19')

Drop Out VGG

In [None]:
class VGGD(nn.Module):
    def __init__(self, vgg_name,dropout):
        super(VGGD, self).__init__()
        self.input_size = 32
        self.features = self._make_layers(cfg[vgg_name])
        self.n_maps = cfg[vgg_name][-2]
        self.fc = self._make_fc_layers()
        self.classifier = nn.Linear(self.n_maps, 10)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.fc(out)
        out = self.classifier(out)
        return out

    def _make_fc_layers(self):
        layers = []
        layers += [nn.Linear(self.n_maps*self.input_size*self.input_size, self.n_maps),
                   nn.BatchNorm1d(self.n_maps),
                   nn.ReLU(inplace=True)]
        return nn.Sequential(*layers)

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
                self.input_size = self.input_size // 2
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

def VGG9dropout(dropout):
    return VGGD('VGG9',dropout)

def VGG16dropout(dropout):
    return VGGD('VGG16',dropout)

def VGG19dropout(dropout):
    return VGGD('VGG19',dropout)

# Baseline Model

In [None]:
model = VGG19()
save_model(model, 'initailisation.pth')
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='baseline_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.449555, train accuracy: 0.55000 Train ECE: 0.018160 [epoch 1 and batch 195/195 (step 0)]
loss: 0.971693, train accuracy: 0.67500 Train ECE: 0.011676 [epoch 2 and batch 195/195 (step 0)]
loss: 0.728521, train accuracy: 0.76250 Train ECE: 0.014807 [epoch 3 and batch 195/195 (step 0)]
loss: 0.554465, train accuracy: 0.81250 Train ECE: 0.017131 [epoch 4 and batch 195/195 (step 0)]
loss: 0.406162, train accuracy: 0.81250 Train ECE: 0.018275 [epoch 5 and batch 195/195 (step 0)]
loss: 0.288240, train accuracy: 0.92500 Train ECE: 0.020430 [epoch 6 and batch 195/195 (step 0)]
loss: 0.192937, train accuracy: 0.93750 Train ECE: 0.020109 [epoch 7 and batch 195/195 (step 0)]
loss: 0.151293, train accuracy: 0.96250 Train ECE: 0.013978 [epoch 8 and batch 195/195 (step 0)]
loss: 0.110148, train accuracy: 0.90000 Train ECE: 0.013167 [epoch 9 and batch 195/195 (step 0)]
loss: 0.093275, train accuracy: 0.98750 Train ECE: 0.010097 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

# Baseline Adversarial Training

In [None]:
model = VGG19()
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 45
path ='initialisation_adversarial.pth'
train(model,loss_fn,optimizer,randomtrainloader,testloader,epochs,path)

loss: 2.318720, train accuracy: 0.15000 Train ECE: 0.028194 [epoch 1 and batch 195/195 (step 0)]
loss: 2.295920, train accuracy: 0.08750 Train ECE: 0.005455 [epoch 2 and batch 195/195 (step 0)]
loss: 2.288567, train accuracy: 0.13750 Train ECE: 0.003244 [epoch 3 and batch 195/195 (step 0)]
loss: 2.279170, train accuracy: 0.17500 Train ECE: 0.004599 [epoch 4 and batch 195/195 (step 0)]
loss: 2.266697, train accuracy: 0.11250 Train ECE: 0.004402 [epoch 5 and batch 195/195 (step 0)]
loss: 2.246568, train accuracy: 0.12500 Train ECE: 0.006377 [epoch 6 and batch 195/195 (step 0)]
loss: 2.210131, train accuracy: 0.17500 Train ECE: 0.007813 [epoch 7 and batch 195/195 (step 0)]
loss: 2.154719, train accuracy: 0.22500 Train ECE: 0.007302 [epoch 8 and batch 195/195 (step 0)]
loss: 2.080684, train accuracy: 0.33750 Train ECE: 0.007594 [epoch 9 and batch 195/195 (step 0)]
loss: 1.976050, train accuracy: 0.28750 Train ECE: 0.011712 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

In [None]:
evaluate_model(model,testloader)

---------- TEST ACC ---------- val loss: 5.756694, val accuracy: 0.0625 Test ECE: 0.677601 


# Bad Minima

In [None]:
model = VGG19()
model.load_state_dict(torch.load('/content/initialisation_adversarial.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='adversarial_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.791075, train accuracy: 0.61250 Train ECE: 0.044015 [epoch 1 and batch 195/195 (step 0)]
loss: 1.052271, train accuracy: 0.61250 Train ECE: 0.006715 [epoch 2 and batch 195/195 (step 0)]
loss: 0.746536, train accuracy: 0.72500 Train ECE: 0.008548 [epoch 3 and batch 195/195 (step 0)]
loss: 0.461658, train accuracy: 0.86250 Train ECE: 0.017962 [epoch 4 and batch 195/195 (step 0)]
loss: 0.254587, train accuracy: 0.86250 Train ECE: 0.018769 [epoch 5 and batch 195/195 (step 0)]
loss: 0.150728, train accuracy: 0.95000 Train ECE: 0.014958 [epoch 6 and batch 195/195 (step 0)]
loss: 0.096269, train accuracy: 0.98750 Train ECE: 0.013163 [epoch 7 and batch 195/195 (step 0)]
loss: 0.061901, train accuracy: 0.96250 Train ECE: 0.009401 [epoch 8 and batch 195/195 (step 0)]
loss: 0.053642, train accuracy: 0.98750 Train ECE: 0.008529 [epoch 9 and batch 195/195 (step 0)]
loss: 0.029351, train accuracy: 0.97500 Train ECE: 0.007610 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

# Drop out effect

From initialisation

In [None]:
model = VGG19dropout(0.05)
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='dropout_0_05_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.476536, train accuracy: 0.50000 Train ECE: 0.018201 [epoch 1 and batch 195/195 (step 0)]
loss: 0.995980, train accuracy: 0.65000 Train ECE: 0.014867 [epoch 2 and batch 195/195 (step 0)]
loss: 0.759000, train accuracy: 0.87500 Train ECE: 0.014580 [epoch 3 and batch 195/195 (step 0)]
loss: 0.579543, train accuracy: 0.78750 Train ECE: 0.017494 [epoch 4 and batch 195/195 (step 0)]
loss: 0.427509, train accuracy: 0.83750 Train ECE: 0.020704 [epoch 5 and batch 195/195 (step 0)]
loss: 0.310339, train accuracy: 0.77500 Train ECE: 0.018914 [epoch 6 and batch 195/195 (step 0)]
loss: 0.236956, train accuracy: 0.96250 Train ECE: 0.014874 [epoch 7 and batch 195/195 (step 0)]
loss: 0.152931, train accuracy: 0.88750 Train ECE: 0.017330 [epoch 8 and batch 195/195 (step 0)]
loss: 0.129540, train accuracy: 0.91250 Train ECE: 0.011882 [epoch 9 and batch 195/195 (step 0)]
loss: 0.091629, train accuracy: 0.93750 Train ECE: 0.010967 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

In [None]:
model = VGG19dropout(0.1)
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='dropout_0_1_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.490263, train accuracy: 0.55000 Train ECE: 0.020255 [epoch 1 and batch 195/195 (step 0)]
loss: 1.008065, train accuracy: 0.55000 Train ECE: 0.012194 [epoch 2 and batch 195/195 (step 0)]
loss: 0.767529, train accuracy: 0.71250 Train ECE: 0.015794 [epoch 3 and batch 195/195 (step 0)]
loss: 0.589406, train accuracy: 0.73750 Train ECE: 0.017051 [epoch 4 and batch 195/195 (step 0)]
loss: 0.442355, train accuracy: 0.85000 Train ECE: 0.018951 [epoch 5 and batch 195/195 (step 0)]
loss: 0.313614, train accuracy: 0.85000 Train ECE: 0.021300 [epoch 6 and batch 195/195 (step 0)]
loss: 0.234078, train accuracy: 0.91250 Train ECE: 0.016770 [epoch 7 and batch 195/195 (step 0)]
loss: 0.169612, train accuracy: 0.93750 Train ECE: 0.015291 [epoch 8 and batch 195/195 (step 0)]
loss: 0.122980, train accuracy: 0.92500 Train ECE: 0.013248 [epoch 9 and batch 195/195 (step 0)]
loss: 0.089786, train accuracy: 0.97500 Train ECE: 0.011945 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

In [None]:
model = VGG19dropout(0.15)
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='dropout_0_15_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.505321, train accuracy: 0.56250 Train ECE: 0.020620 [epoch 1 and batch 195/195 (step 0)]
loss: 1.012848, train accuracy: 0.63750 Train ECE: 0.015799 [epoch 2 and batch 195/195 (step 0)]
loss: 0.769535, train accuracy: 0.77500 Train ECE: 0.014365 [epoch 3 and batch 195/195 (step 0)]
loss: 0.585006, train accuracy: 0.71250 Train ECE: 0.019903 [epoch 4 and batch 195/195 (step 0)]
loss: 0.440061, train accuracy: 0.83750 Train ECE: 0.019838 [epoch 5 and batch 195/195 (step 0)]
loss: 0.322713, train accuracy: 0.88750 Train ECE: 0.020702 [epoch 6 and batch 195/195 (step 0)]
loss: 0.224446, train accuracy: 0.91250 Train ECE: 0.019315 [epoch 7 and batch 195/195 (step 0)]
loss: 0.163512, train accuracy: 0.91250 Train ECE: 0.016313 [epoch 8 and batch 195/195 (step 0)]
loss: 0.124925, train accuracy: 0.92500 Train ECE: 0.013424 [epoch 9 and batch 195/195 (step 0)]
loss: 0.097259, train accuracy: 0.95000 Train ECE: 0.010729 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

In [None]:
model = VGG19dropout(0.20)
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='dropout_0_20_model.pth'
train(model,loss_fn,optimizer,trainloader,testloader,epochs,path)

loss: 1.531903, train accuracy: 0.61250 Train ECE: 0.025097 [epoch 1 and batch 195/195 (step 0)]
loss: 1.013648, train accuracy: 0.66250 Train ECE: 0.019699 [epoch 2 and batch 195/195 (step 0)]
loss: 0.772173, train accuracy: 0.68750 Train ECE: 0.016032 [epoch 3 and batch 195/195 (step 0)]
loss: 0.595901, train accuracy: 0.80000 Train ECE: 0.018909 [epoch 4 and batch 195/195 (step 0)]
loss: 0.433069, train accuracy: 0.85000 Train ECE: 0.023289 [epoch 5 and batch 195/195 (step 0)]
loss: 0.315982, train accuracy: 0.85000 Train ECE: 0.021163 [epoch 6 and batch 195/195 (step 0)]
loss: 0.232113, train accuracy: 0.87500 Train ECE: 0.019262 [epoch 7 and batch 195/195 (step 0)]
loss: 0.172513, train accuracy: 0.91250 Train ECE: 0.015873 [epoch 8 and batch 195/195 (step 0)]
loss: 0.132421, train accuracy: 0.96250 Train ECE: 0.013292 [epoch 9 and batch 195/195 (step 0)]
loss: 0.088329, train accuracy: 0.88750 Train ECE: 0.013353 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

# Augmentataion

In [None]:
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

transform_aug = v2.Compose(
    [transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor()])

trainset_aug = datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_aug)
trainloader_aug = torch.utils.data.DataLoader(trainset_aug, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified


From base initialisation

In [None]:
model = VGG19()
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
path ='aug_model.pth'
train(model,loss_fn,optimizer,trainloader_aug,testloader,epochs,path)

loss: 1.795096, train accuracy: 0.41250 Train ECE: 0.009752 [epoch 1 and batch 195/195 (step 0)]
loss: 1.371126, train accuracy: 0.46250 Train ECE: 0.006790 [epoch 2 and batch 195/195 (step 0)]
loss: 1.159115, train accuracy: 0.56250 Train ECE: 0.009422 [epoch 3 and batch 195/195 (step 0)]
loss: 1.021967, train accuracy: 0.63750 Train ECE: 0.008261 [epoch 4 and batch 195/195 (step 0)]
loss: 0.916077, train accuracy: 0.73750 Train ECE: 0.005898 [epoch 5 and batch 195/195 (step 0)]
loss: 0.835270, train accuracy: 0.75000 Train ECE: 0.006968 [epoch 6 and batch 195/195 (step 0)]
loss: 0.774533, train accuracy: 0.67500 Train ECE: 0.006386 [epoch 7 and batch 195/195 (step 0)]
loss: 0.705744, train accuracy: 0.68750 Train ECE: 0.006032 [epoch 8 and batch 195/195 (step 0)]
loss: 0.666868, train accuracy: 0.78750 Train ECE: 0.006836 [epoch 9 and batch 195/195 (step 0)]
loss: 0.616841, train accuracy: 0.76250 Train ECE: 0.008699 [epoch 10 and batch 195/195 (step 0)]
---------- TEST ACC ---------

# Early stopping

In [None]:
def train_early_stopping(model,loss_fn,optimizer,trainloader,testloader,epochs,path,patience):
  size_train = int(np.ceil(len(trainloader.dataset)//trainloader.batch_size))
  size_test = int(np.ceil(len(testloader.dataset)//testloader.batch_size))
  model.to(device)
  step = 0
  best_acc = 0
  for epoch in range(1, epochs + 1):
      model.train()
      ece_train = MulticlassCalibrationError(num_classes=10,n_bins=15,norm='l1').to(device)
      running_loss = 0
      running_test_loss = 0
      for batch, (X, y) in enumerate(trainloader):
          optimizer.zero_grad()
          X = X.to(device)
          y = y.to(device)
          # Compute prediction and loss
          pred = model(X)
          loss = loss_fn(pred, y)
          # Backpropagation
          loss.backward()
          optimizer.step()
          running_loss += loss.item()
          acc = np.mean(
              (torch.argmax(pred, dim=-1) == y).detach().cpu().numpy()
          )
          ece_train.update(pred,y)
      print(
          f"loss: {running_loss/len(trainloader):>7f}, train accuracy: {acc:.5f} "
          f"Train ECE: {ece_train.compute().detach().cpu().item():>5f} "
          f"[epoch {epoch} and batch {batch}/{size_train} (step {step})]"
      )

      model.eval() # Stop any weight updates on the model (i.e. Batch weights)
      ece_test = MulticlassCalibrationError(num_classes=10,n_bins=15,norm='l1').to(device)
      for test_batch, (X_val, y_val) in enumerate(testloader):
        x_val = X_val.to(device)
        y_val = y_val.to(device)
        with torch.no_grad():
          val_pred = model(x_val)
          val_loss = loss_fn(val_pred, y_val)
          running_test_loss += val_loss.item()
          val_acc = np.mean(
              (torch.argmax(val_pred, dim=-1) == y_val).detach().cpu().numpy()
          )
          ece_test.update(val_pred,y_val)
      print(
          "-"*10, "TEST ACC", "-"*10,
          f"val loss: {running_test_loss/len(testloader):>5f}, val accuracy: {val_acc:.4f} "
          f"Test ECE: {ece_test.compute().detach().cpu().item():>5f} "
          f"[epoch {epoch} and batch {batch}/{size_test}]",
          "-"*10,"TEST ACC","-"*10,
          )
      if val_acc > best_acc:
          best_acc = val_acc
          best_model = model
          consecutive_no_improvement = 0
      else:
          consecutive_no_improvement += 1
          if consecutive_no_improvement >= patience:
              print(f'Early stopping after {patience} consecutive epochs without improvement.')
              break

  print('Finished Training')
  save_model(best_model,path)

In [None]:
model = VGG19()
model.load_state_dict(torch.load('/content/initailisation.pth'))
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
epochs = 20
patience = 10
path ='ES_model.pth'
train_early_stopping(model,loss_fn,optimizer,trainloader,testloader,epochs,path,patience)

loss: 1.462413, train accuracy: 0.52500 Train ECE: 0.015306 [epoch 1 and batch 195/195 (step 0)]
---------- TEST ACC ---------- val loss: 1.166197, val accuracy: 0.6250 Test ECE: 0.012493 [epoch 1 and batch 195/39] ---------- TEST ACC ----------
loss: 0.971399, train accuracy: 0.68750 Train ECE: 0.014888 [epoch 2 and batch 195/195 (step 0)]
---------- TEST ACC ---------- val loss: 0.971820, val accuracy: 0.5625 Test ECE: 0.017924 [epoch 2 and batch 195/39] ---------- TEST ACC ----------
loss: 0.728210, train accuracy: 0.73750 Train ECE: 0.014156 [epoch 3 and batch 195/195 (step 0)]
---------- TEST ACC ---------- val loss: 0.867364, val accuracy: 0.6250 Test ECE: 0.033588 [epoch 3 and batch 195/39] ---------- TEST ACC ----------
loss: 0.540428, train accuracy: 0.86250 Train ECE: 0.019065 [epoch 4 and batch 195/195 (step 0)]
---------- TEST ACC ---------- val loss: 0.845138, val accuracy: 0.6250 Test ECE: 0.064413 [epoch 4 and batch 195/39] ---------- TEST ACC ----------
loss: 0.390257, 

# Visualise Landscape of VGG 19 Landscape

In [None]:
! git clone https://github.com/IFMW01/loss-landscape_R252.git

Cloning into 'loss-landscape_R252'...
remote: Enumerating objects: 268, done.[K
remote: Counting objects: 100% (115/115), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 268 (delta 86), reused 85 (delta 83), pack-reused 153[K
Receiving objects: 100% (268/268), 1.89 MiB | 20.85 MiB/s, done.
Resolving deltas: 100% (149/149), done.


In [None]:
cd loss-landscape_R252/

/content/loss-landscape_R252


Baseline Model

In [None]:
! python plot_surface.py --cuda --model vgg19 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/models/baseline_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --test --raw_data
# ! python plot_surface.py --cuda --model vgg9 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/random_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --dataset cifar10r --plot

Rank 0 use GPU 0 of 1 GPUs on 0eaef4ec5c02
-------------------------------------------------------------------
setup_direction
-------------------------------------------------------------------
/content/loss-landscape_R252/models/baseline_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5 is already setted up
cosine similarity between x-axis and y-axis: 0.000060
Files already downloaded and verified
Files already downloaded and verified
Computing 961 values for rank 0
Evaluating rank 0  0/961  (0.0%)  coord=[-1. -1.] 	test_loss= 5487.638 	test_acc=10.09 	time=3.04 	sync=0.00
Evaluating rank 0  1/961  (0.1%)  coord=[-0.93333333 -1.        ] 	test_loss= 3653.971 	test_acc=10.91 	time=2.23 	sync=0.00
Evaluating rank 0  2/961  (0.2%)  coord=[-0.86666667 -1.        ] 	test_loss= 2498.986 	test_acc=10.97 	time=2.06 	sync=0.00
Evaluating rank 0  3/961  (0.3%)  coord=[-0.8 -1. ] 	test_loss= 1760.727 	test_acc=10.37 	time=2.04 	sync=0.00
Evaluating rank 0  4/961  (0.4

In [None]:
!python h52vtp.py --surf_file /content/loss-landscape_R252/models/baseline_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5 --surf_name test_loss --zmax  10 --log

  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 2) is 0:
  if (vertexcount % 2) is 1:
  if (vertexcount % 2) is not 1:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (polycount % 2) is 0:
  if (polycount % 2) is 1:
  if (polycount % 2) is 1:
  if (polycount % 6) is 0:
  if (polycount % 6) is 5:
  if (polycount % 6) is not 5:
Here's your output file:/content/loss-landscape_R252/models/baseline_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5_test_loss_zmax=10.0_log.vtp
number_points = 961 points
matrix_size = 31 x 31
poly_size = 30 x 30
number_polys = 900
Done with file:/content/loss-landscape_R252/models/baseline_model.pth_weights_

Aversarial Model

In [None]:
! python plot_surface.py --cuda --model vgg19 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/models/adversarial_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --test --raw_data

Rank 0 use GPU 0 of 1 GPUs on 0eaef4ec5c02
-------------------------------------------------------------------
setup_direction
-------------------------------------------------------------------
Setting up the plotting directions...
direction file created: /content/loss-landscape_R252/models/adversarial_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5
cosine similarity between x-axis and y-axis: 0.000116
Files already downloaded and verified
Files already downloaded and verified
Computing 961 values for rank 0
Evaluating rank 0  0/961  (0.0%)  coord=[-1. -1.] 	test_loss= 4298.299 	test_acc=10.57 	time=2.61 	sync=0.00
Evaluating rank 0  1/961  (0.1%)  coord=[-0.93333333 -1.        ] 	test_loss= 2947.255 	test_acc=10.02 	time=2.61 	sync=0.00
Evaluating rank 0  2/961  (0.2%)  coord=[-0.86666667 -1.        ] 	test_loss= 2043.756 	test_acc=9.86 	time=2.29 	sync=0.00
Evaluating rank 0  3/961  (0.3%)  coord=[-0.8 -1. ] 	test_loss= 1417.864 	test_acc=9.84 	time=2.07

In [None]:
!python h52vtp.py --surf_file /content/loss-landscape_R252/models/adversarial_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5  --surf_name test_loss --zmax  10 --log

  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 2) is 0:
  if (vertexcount % 2) is 1:
  if (vertexcount % 2) is not 1:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (polycount % 2) is 0:
  if (polycount % 2) is 1:
  if (polycount % 2) is 1:
  if (polycount % 6) is 0:
  if (polycount % 6) is 5:
  if (polycount % 6) is not 5:
Here's your output file:/content/loss-landscape_R252/models/adversarial_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5_test_loss_zmax=10.0_log.vtp
number_points = 961 points
matrix_size = 31 x 31
poly_size = 30 x 30
number_polys = 900
Done with file:/content/loss-landscape_R252/models/adversarial_model.pth_we

# Explict Regularisation

In [None]:
! python plot_surface.py --cuda --model vgg19 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/models/dropout_0_1_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --test --raw_data

Rank 0 use GPU 0 of 1 GPUs on 0eaef4ec5c02
-------------------------------------------------------------------
setup_direction
-------------------------------------------------------------------
Setting up the plotting directions...
direction file created: /content/loss-landscape_R252/models/dropout_0_1_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5
cosine similarity between x-axis and y-axis: 0.000063
Files already downloaded and verified
Files already downloaded and verified
Computing 961 values for rank 0
Evaluating rank 0  0/961  (0.0%)  coord=[-1. -1.] 	test_loss= 3275.346 	test_acc=9.43 	time=2.63 	sync=0.00
Evaluating rank 0  1/961  (0.1%)  coord=[-0.93333333 -1.        ] 	test_loss= 2069.904 	test_acc=10.69 	time=2.10 	sync=0.00
Evaluating rank 0  2/961  (0.2%)  coord=[-0.86666667 -1.        ] 	test_loss= 1418.004 	test_acc=10.97 	time=2.12 	sync=0.00
Evaluating rank 0  3/961  (0.3%)  coord=[-0.8 -1. ] 	test_loss= 1009.619 	test_acc=10.81 	time=2.4

In [None]:
!python h52vtp.py --surf_file /content/loss-landscape_R252/models/dropout_0_1_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5  --surf_name test_loss --zmax  10 --log

  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 2) is 0:
  if (vertexcount % 2) is 1:
  if (vertexcount % 2) is not 1:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (polycount % 2) is 0:
  if (polycount % 2) is 1:
  if (polycount % 2) is 1:
  if (polycount % 6) is 0:
  if (polycount % 6) is 5:
  if (polycount % 6) is not 5:
Here's your output file:/content/loss-landscape_R252/models/dropout_0_1_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5_test_loss_zmax=10.0_log.vtp
number_points = 961 points
matrix_size = 31 x 31
poly_size = 30 x 30
number_polys = 900
Done with file:/content/loss-landscape_R252/models/dropout_0_1_model.pth_we

augmentation

In [None]:
! python plot_surface.py --cuda --model vgg19 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/models/aug_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --test --raw_data

Rank 0 use GPU 0 of 1 GPUs on 86dbdd65c329
-------------------------------------------------------------------
setup_direction
-------------------------------------------------------------------
Setting up the plotting directions...
direction file created: /content/loss-landscape_R252/models/aug_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5
cosine similarity between x-axis and y-axis: 0.000051
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10/data/cifar-10-python.tar.gz
100% 170498071/170498071 [00:05<00:00, 30599751.04it/s]
Extracting cifar10/data/cifar-10-python.tar.gz to cifar10/data
Files already downloaded and verified
Computing 961 values for rank 0
Evaluating rank 0  0/961  (0.0%)  coord=[-1. -1.] 	test_loss= 2242.233 	test_acc=10.01 	time=3.44 	sync=0.00
Evaluating rank 0  1/961  (0.1%)  coord=[-0.93333333 -1.        ] 	test_loss= 1462.937 	test_acc=10.01 	time=2.00 	sync=0.00
Evaluating rank 0  2/961  (0.2%)  coord=[-

In [None]:
!python h52vtp.py --surf_file /content/loss-landscape_R252/models/aug_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5 --surf_name test_loss --zmax  10 --log

  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 2) is 0:
  if (vertexcount % 2) is 1:
  if (vertexcount % 2) is not 1:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (polycount % 2) is 0:
  if (polycount % 2) is 1:
  if (polycount % 2) is 1:
  if (polycount % 6) is 0:
  if (polycount % 6) is 5:
  if (polycount % 6) is not 5:
Here's your output file:/content/loss-landscape_R252/models/aug_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5_test_loss_zmax=10.0_log.vtp
number_points = 961 points
matrix_size = 31 x 31
poly_size = 30 x 30
number_polys = 900
Done with file:/content/loss-landscape_R252/models/aug_model.pth_weights_xignore=bi

Early Stopping

In [None]:
! python plot_surface.py --cuda --model vgg19 --x=-1:1:31 --y=-1:1:31 --model_file /content/loss-landscape_R252/models/ES_model.pth --xnorm filter --xignore biasbn --ynorm filter --yignore biasbn --plot --test --raw_data

Rank 0 use GPU 0 of 1 GPUs on 86dbdd65c329
-------------------------------------------------------------------
setup_direction
-------------------------------------------------------------------
Setting up the plotting directions...
direction file created: /content/loss-landscape_R252/models/ES_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5
cosine similarity between x-axis and y-axis: 0.000061
Files already downloaded and verified
Files already downloaded and verified
Computing 961 values for rank 0
Evaluating rank 0  0/961  (0.0%)  coord=[-1. -1.] 	test_loss= 4910.500 	test_acc=9.61 	time=2.70 	sync=0.00
Evaluating rank 0  1/961  (0.1%)  coord=[-0.93333333 -1.        ] 	test_loss= 3182.296 	test_acc=9.43 	time=2.79 	sync=0.00
Evaluating rank 0  2/961  (0.2%)  coord=[-0.86666667 -1.        ] 	test_loss= 2263.675 	test_acc=10.62 	time=2.00 	sync=0.00
Evaluating rank 0  3/961  (0.3%)  coord=[-0.8 -1. ] 	test_loss= 1677.153 	test_acc=11.25 	time=1.99 	sync=0.

In [None]:
!python h52vtp.py --surf_file /content/loss-landscape_R252/models/ES_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5 --surf_name test_loss --zmax  10 --log

  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 2) is 0:
  if (vertexcount % 2) is 1:
  if (vertexcount % 2) is not 1:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (vertexcount % 6) is 0:
  if (vertexcount % 6) is 5:
  if (vertexcount % 6) is not 5:
  if (polycount % 2) is 0:
  if (polycount % 2) is 1:
  if (polycount % 2) is 1:
  if (polycount % 6) is 0:
  if (polycount % 6) is 5:
  if (polycount % 6) is not 5:
Here's your output file:/content/loss-landscape_R252/models/ES_model.pth_weights_xignore=biasbn_xnorm=filter_yignore=biasbn_ynorm=filter.h5_[-1.0,1.0,31]x[-1.0,1.0,31]_rawdata.h5_test_loss_zmax=10.0_log.vtp
number_points = 961 points
matrix_size = 31 x 31
poly_size = 30 x 30
number_polys = 900
Done with file:/content/loss-landscape_R252/models/ES_model.pth_weights_xignore=bias