## Imports

In [2]:
!python3 -m spacy download en
!python3 -m spacy download fr
!pip install torchinfo
!pip install einops
!pip install wandb

import math
import time
import os
import copy
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch import Tensor
from torch.nn import functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.utils.checkpoint as cp
from torch.utils.data import DataLoader
from torch.jit.annotations import List
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor, Normalize
from torchvision.transforms import Resize
from torchvision.transforms.functional_pil import resize
from torchvision.transforms import Grayscale
from torchvision.models.resnet import ResNet, BasicBlock


import numpy as np
from collections import OrderedDict
from tqdm import tqdm
from torchinfo import summary
import wandb
from PIL import Image, ImageOps, ImageEnhance

2023-05-02 03:28:46.786994: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-02 03:28:50.595198: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-02 03:28:50.595786: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-

**Drive**

In [None]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
torch.manual_seed(5)
torch.cuda.manual_seed_all(5)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

# **Downloading and Preprocessing Data**

In [3]:
mnist_trainset = datasets.MNIST(root='./data/MNIST', train=True, download=True)
mnist_trainset.transform = transforms.Compose([Grayscale(num_output_channels=3), ToTensor(), Normalize([0.1205],[0.3015]),])
mnist_testset = datasets.MNIST(root='./data/MNIST', train=False, download=True)
mnist_testset.transform = transforms.Compose([Grayscale(num_output_channels=3), ToTensor(), Normalize([0.1307],[0.3015]),])
mnist_trainset = DataLoader(mnist_trainset, batch_size=128, shuffle=True)
mnist_testset = DataLoader(mnist_testset, batch_size=128)

CIFAR10_trainset = datasets.CIFAR10(root='./data/CIFAR', train=True, download=True)
CIFAR10_trainset.transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),ToTensor(), Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),])
CIFAR10_testset = datasets.CIFAR10(root='./data/CIFAR', train=False, download=True)
CIFAR10_testset.transform = transforms.Compose([ToTensor(), Normalize([0.4914, 0.4822, 0.4465],[0.2023, 0.1994, 0.2010]),])
CIFAR10_trainset = DataLoader(CIFAR10_trainset, batch_size=128, shuffle=True)
CIFAR10_testset = DataLoader(CIFAR10_testset, batch_size=128)

CIFAR100_trainset = datasets.CIFAR100(root='./data/CIFAR', train=True, download=True)
CIFAR100_trainset.transform = transforms.Compose([transforms.RandomHorizontalFlip(),transforms.RandomCrop(32, padding=4),ToTensor(),Normalize([0.5017, 0.4866, 0.4409],[0.2009, 0.1984, 0.2023]),])
CIFAR100_testset = datasets.CIFAR100(root='./data/CIFAR', train=False, download=True)
CIFAR100_testset.transform = transforms.Compose([ToTensor(), Normalize([0.5017, 0.4866, 0.4409],[0.2009, 0.1984, 0.2023]),])
CIFAR100_trainset = DataLoader(CIFAR100_trainset, batch_size=128, shuffle=True)
CIFAR100_testset = DataLoader(CIFAR100_testset, batch_size=128)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 257931442.25it/s]

Extracting ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 88098686.42it/s]


Extracting ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 77912368.58it/s]

Extracting ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 18974630.25it/s]


Extracting ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/CIFAR/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:12<00:00, 13126558.22it/s]


Extracting ./data/CIFAR/cifar-10-python.tar.gz to ./data/CIFAR
Files already downloaded and verified
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/CIFAR/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:12<00:00, 13030374.41it/s]


Extracting ./data/CIFAR/cifar-100-python.tar.gz to ./data/CIFAR
Files already downloaded and verified


# **Base Network - ResNet-18**

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()

        if stride != 1 or in_planes != self.expansion*planes:
            conv2d = nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False)
            bnorm2d = nn.BatchNorm2d(self.expansion*planes)
            self.shortcut = nn.Sequential(conv2d, bnorm2d)

    def forward(self, x):
        output = F.relu(self.bn1(self.conv1(x)))
        return F.relu(self.bn2(self.conv2(output)) + self.shortcut(x))


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
        self.shortcut = nn.Sequential()

        if stride != 1 or in_planes != self.expansion*planes:
            conv2d = nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False)
            bnorm2d = nn.BatchNorm2d(self.expansion*planes)
            self.shortcut = nn.Sequential(conv2d,bnorm2d)

    def forward(self, x):
        output = F.relu(self.bn1(self.conv1(x)))
        output = F.relu(self.bn2(self.conv2(output)))
        return F.relu(self.bn3(self.conv3(output)) + self.shortcut(x))


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.num_classes = num_classes
        self.linear = nn.Linear(512*block.expansion, self.num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for s in strides:
            layers.append(block(self.in_planes, planes, s))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        output = F.relu(self.bn1(self.conv1(x)))
        output = self.layer1(output)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.layer4(output)
        output = F.avg_pool2d(output, 4)
        output = output.view(output.size(0), -1)
        return self.linear(output)


def ResNet18(num_classes):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)
     

# **Model Training**

In [None]:
config = {
    'model_name' : 'resnet',
    'dataset_name' : 'CIFAR10',
    'batch_size' : 128,
    'lr': 1e-1,
    'weight_decay': 1e-4,
    'image_size' : 32,
    'epochs' : 20,
    'num_classes' : 10,
    'feature_extract' : False,
    'log_every': 50,
    'mixup_alpha' : 1,
    'augmentations': True,
    'device': torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    'model_type': 'Baseline'
}

device = config['device']


def train_model(model, trainloader, testloader, criterion, optimizer, scheduler, model_type, datasetname, num_epochs=25):
  print('model_type - ', model_type)

  start_time = time.time()
  val_acc_history = []

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  device = config['device']

  for epoch in range(num_epochs):

    print('Epoch {}/{}'.format(epoch+1, num_epochs), '\n----------')

    for phase in ['train', 'valid']:
      if phase == 'train':
        model.train()

        running_loss = 0.0
        running_corrects = 0

        for batch_idx,(inputs, labels) in tqdm(enumerate(trainloader)):
          optimizer.zero_grad()

          if model_type == 'MixUp':
            inputs = inputs.to(device)
            labels = labels.to(device) 
            inputs, labels, second_labels, lam = mixup(inputs, labels, config['mixup_alpha'])
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, labels, second_labels, lam)
            _, preds = torch.max(outputs, 1)
            
          elif model_type == 'CutMix':
            inputs = inputs.to(device)
            labels = labels.to(device) 
            inputs, labels, second_labels, lam = cutmix(inputs, labels, config['mixup_alpha'])
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, labels, second_labels, lam)
            _, preds = torch.max(outputs, 1)

          else:
            inputs = inputs.to(device)
            labels = labels.to(device) 
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
          
          loss.backward()
          optimizer.step()
          

          if batch_idx % config['log_every'] == 0: wandb.log({f'Train - loss': loss.item()})

          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(trainloader.dataset)
        epoch_acc = running_corrects.double() / len(trainloader.dataset)
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        wandb.log({'Train - acc': epoch_acc})
      
      
      else:
        model.eval()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in testloader:
          inputs = inputs.to(device)
          labels = labels.to(device)
          optimizer.zero_grad()
          with torch.set_grad_enabled(False):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
          epoch_loss = running_loss / len(testloader.dataset)
          epoch_acc = running_corrects.double() / len(testloader.dataset)
        
        wandb.log({'Valid - loss': epoch_loss})
        wandb.log({'Valid - acc': epoch_acc})
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

        if epoch_acc > best_acc:
          best_acc = epoch_acc
          best_model_wts = copy.deepcopy(model.state_dict())


        print('------------------------------')
        val_acc_history.append(epoch_acc)

    if scheduler: scheduler.step()



  time_diff = time.time() - start_time
  print('Training complete in {:.0f}m {:.0f}s'.format(time_diff // 60, time_diff % 60))
  print('Best validation Acc: {:4f}'.format(best_acc))
  model.load_state_dict(best_model_wts)
  return model, val_acc_history

def initialize_model(model_name, dataset_name, feature_extract=False, use_pretrained=False):
    input_size = 0

    if dataset_name == "mnist":
      input_size = 28
      num_classes = 10
    elif dataset_name =="cifar10":
      input_size = 32
      num_classes = 10
    else:
      input_size = 32
      num_classes = 100
    model_ft = ResNet18(num_classes)
    return model_ft, input_size

# **Training ResNet-18 on MNIST Dataset**

In [None]:
config['model_name'] = 'resnet'
config['dataset_name'] = 'mnist'
model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'])
optimizer_ft = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = 0.0005, nesterov = True)
scheduler_ft = CosineAnnealingLR(optimizer_ft, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
      config=config,
      project='Project',
      group= 'baseline_mnist',
      save_code=True,
  ):
  model_ft, hist = train_model(
      model_ft, mnist_trainset,
      mnist_testset, criterion,
      optimizer_ft, scheduler_ft,
      num_epochs=config['epochs'],
      model_type= "Baseline",
      datasetname = config['dataset_name']
      )


[34m[1mwandb[0m: Currently logged in as: [33messe-dawut[0m ([33mddmaster[0m). Use [1m`wandb login --relogin`[0m to force relogin


DAmethod -  Baseline
Epoch 1/20
----------


469it [01:21,  5.78it/s]


train Loss: 0.4012 Acc: 0.8984
valid Loss: 0.0837 Acc: 0.9734
Saving model...
------------------------------

Epoch 2/20
----------


469it [01:14,  6.31it/s]


train Loss: 0.0534 Acc: 0.9839
valid Loss: 0.0920 Acc: 0.9709
------------------------------

Epoch 3/20
----------


469it [01:14,  6.31it/s]


train Loss: 0.0381 Acc: 0.9886
valid Loss: 0.0443 Acc: 0.9872
Saving model...
------------------------------

Epoch 4/20
----------


469it [01:14,  6.28it/s]


train Loss: 0.0323 Acc: 0.9903
valid Loss: 0.0356 Acc: 0.9890
Saving model...
------------------------------

Epoch 5/20
----------


469it [01:15,  6.21it/s]


train Loss: 0.0272 Acc: 0.9919
valid Loss: 0.0528 Acc: 0.9839
------------------------------

Epoch 6/20
----------


469it [01:14,  6.31it/s]


train Loss: 0.0290 Acc: 0.9910
valid Loss: 0.0390 Acc: 0.9874
------------------------------

Epoch 7/20
----------


469it [01:14,  6.28it/s]


train Loss: 0.0247 Acc: 0.9925
valid Loss: 0.0466 Acc: 0.9863
------------------------------

Epoch 8/20
----------


469it [01:14,  6.32it/s]


train Loss: 0.0252 Acc: 0.9926
valid Loss: 0.0508 Acc: 0.9838
------------------------------

Epoch 9/20
----------


469it [01:14,  6.30it/s]


train Loss: 0.0232 Acc: 0.9935
valid Loss: 0.0398 Acc: 0.9884
------------------------------

Epoch 10/20
----------


469it [01:13,  6.35it/s]


train Loss: 0.0250 Acc: 0.9925
valid Loss: 0.0521 Acc: 0.9841
------------------------------

Epoch 11/20
----------


469it [01:14,  6.32it/s]


train Loss: 0.0226 Acc: 0.9933
valid Loss: 0.0379 Acc: 0.9878
------------------------------

Epoch 12/20
----------


469it [01:14,  6.30it/s]


train Loss: 0.0223 Acc: 0.9934
valid Loss: 0.0702 Acc: 0.9791
------------------------------

Epoch 13/20
----------


469it [01:15,  6.20it/s]


train Loss: 0.0223 Acc: 0.9935
valid Loss: 0.0306 Acc: 0.9907
Saving model...
------------------------------

Epoch 14/20
----------


469it [01:18,  5.99it/s]


train Loss: 0.0207 Acc: 0.9940
valid Loss: 0.0336 Acc: 0.9903
------------------------------

Epoch 15/20
----------


469it [01:13,  6.35it/s]


train Loss: 0.0195 Acc: 0.9949
valid Loss: 0.0323 Acc: 0.9898
------------------------------

Epoch 16/20
----------


469it [01:15,  6.25it/s]


train Loss: 0.0197 Acc: 0.9946
valid Loss: 0.0406 Acc: 0.9888
------------------------------

Epoch 17/20
----------


469it [01:14,  6.31it/s]


train Loss: 0.0205 Acc: 0.9941
valid Loss: 0.0260 Acc: 0.9922
Saving model...
------------------------------

Epoch 18/20
----------


469it [01:13,  6.35it/s]


train Loss: 0.0184 Acc: 0.9947
valid Loss: 0.0318 Acc: 0.9902
------------------------------

Epoch 19/20
----------


469it [01:16,  6.14it/s]


train Loss: 0.0171 Acc: 0.9952
valid Loss: 0.0644 Acc: 0.9803
------------------------------

Epoch 20/20
----------


469it [01:13,  6.36it/s]


train Loss: 0.0186 Acc: 0.9946
valid Loss: 0.0301 Acc: 0.9913
------------------------------

Best model saved.
Training complete in 27m 10s
Best val Acc: 0.992200


0,1
Train - acc,▁▇██████████████████
Train - loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
Valid - acc,▂▁▆▇▅▆▆▅▇▅▇▄█▇▇▇█▇▄█
Valid - loss,▇█▃▂▄▂▃▄▂▄▂▆▁▂▂▃▁▂▅▁

0,1
Train - acc,0.99463
Train - loss,0.0023
Valid - acc,0.9913
Valid - loss,0.03008


# **Training ResNet-18 on CIFAR-10 Dataset**

In [None]:
config['model_name'] = 'resnet'
config['dataset_name'] = 'cifar10'
model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'])
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = 0.0005, nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6)
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
        config=config,
        project='Project',
        group= 'baseline_cifar10',
        save_code=True,
    ):
  model_ft, hist = train_model(
      model_ft,
      CIFAR10_trainset,
      CIFAR10_testset,
      criterion,
      optimizer,
      scheduler,
      num_epochs=20,
      model_type = 'Baseline',
      datasetname = config['dataset_name']
      )


DAmethod -  Baseline
Epoch 1/20
----------


391it [01:09,  5.61it/s]


train Loss: 2.1439 Acc: 0.2594
valid Loss: 1.6627 Acc: 0.3824
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.5354 Acc: 0.4316
valid Loss: 1.4112 Acc: 0.4866
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:09,  5.59it/s]


train Loss: 1.2853 Acc: 0.5331
valid Loss: 1.2625 Acc: 0.5514
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.0613 Acc: 0.6226
valid Loss: 0.9874 Acc: 0.6472
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:09,  5.65it/s]


train Loss: 0.9113 Acc: 0.6769
valid Loss: 1.0163 Acc: 0.6495
Saving model...
------------------------------

Epoch 6/20
----------


391it [01:10,  5.55it/s]


train Loss: 0.8119 Acc: 0.7121
valid Loss: 1.0030 Acc: 0.6650
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:09,  5.65it/s]


train Loss: 0.7245 Acc: 0.7452
valid Loss: 0.7642 Acc: 0.7365
Saving model...
------------------------------

Epoch 8/20
----------


391it [01:09,  5.59it/s]


train Loss: 0.6450 Acc: 0.7750
valid Loss: 0.7473 Acc: 0.7415
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:08,  5.68it/s]


train Loss: 0.5869 Acc: 0.7962
valid Loss: 0.8212 Acc: 0.7336
------------------------------

Epoch 10/20
----------


391it [01:09,  5.66it/s]


train Loss: 0.5457 Acc: 0.8126
valid Loss: 0.6317 Acc: 0.7840
Saving model...
------------------------------

Epoch 11/20
----------


391it [01:09,  5.62it/s]


train Loss: 0.5130 Acc: 0.8225
valid Loss: 0.6102 Acc: 0.7999
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.4900 Acc: 0.8322
valid Loss: 0.7122 Acc: 0.7703
------------------------------

Epoch 13/20
----------


391it [01:09,  5.62it/s]


train Loss: 0.4654 Acc: 0.8390
valid Loss: 0.5568 Acc: 0.8122
Saving model...
------------------------------

Epoch 14/20
----------


391it [01:09,  5.63it/s]


train Loss: 0.4530 Acc: 0.8452
valid Loss: 0.7468 Acc: 0.7581
------------------------------

Epoch 15/20
----------


391it [01:09,  5.64it/s]


train Loss: 0.4312 Acc: 0.8525
valid Loss: 0.6759 Acc: 0.7747
------------------------------

Epoch 16/20
----------


391it [01:09,  5.62it/s]


train Loss: 0.4164 Acc: 0.8571
valid Loss: 0.6018 Acc: 0.8025
------------------------------

Epoch 17/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.3958 Acc: 0.8650
valid Loss: 0.5062 Acc: 0.8313
Saving model...
------------------------------

Epoch 18/20
----------


391it [01:09,  5.63it/s]


train Loss: 0.3883 Acc: 0.8660
valid Loss: 0.5286 Acc: 0.8259
------------------------------

Epoch 19/20
----------


391it [01:10,  5.53it/s]


train Loss: 0.3728 Acc: 0.8718
valid Loss: 0.4966 Acc: 0.8354
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:10,  5.56it/s]


train Loss: 0.3635 Acc: 0.8760
valid Loss: 0.5040 Acc: 0.8341
------------------------------

Best model saved.
Training complete in 24m 59s
Best val Acc: 0.835400


0,1
Train - acc,▁▃▄▅▆▆▇▇▇▇▇█████████
Train - loss,█▇▆▆▅▄▅▄▃▃▃▃▃▃▂▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▂▁▂
Valid - acc,▁▃▄▅▅▅▆▇▆▇▇▇█▇▇▇████
Valid - loss,█▆▆▄▄▄▃▃▃▂▂▂▁▃▂▂▁▁▁▁

0,1
Train - acc,0.87602
Train - loss,0.37879
Valid - acc,0.8341
Valid - loss,0.504


# **Training ResNet-18 on CIFAR-100 Dataset**

In [None]:
config['dataset_name'] = 'cifar100'
config['model_name'] = 'resnet'

model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'])
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = 0.0005, nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6)
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
        config=config,
        project='Project',
        group= 'baseline_cifar100',
        save_code=True,
    ):
  model_ft, hist = train_model(
      model_ft,
      CIFAR100_trainset,
      CIFAR100_testset,
      criterion,
      optimizer,
      scheduler,
      num_epochs=config['epochs'],
      model_type = 'Baseline',
      datasetname = config['dataset_name'])

DAmethod -  Baseline
Epoch 1/20
----------


391it [01:10,  5.51it/s]


train Loss: 3.9111 Acc: 0.1001
valid Loss: 3.5916 Acc: 0.1392
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:09,  5.66it/s]


train Loss: 3.1986 Acc: 0.2076
valid Loss: 3.0796 Acc: 0.2376
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:09,  5.59it/s]


train Loss: 2.6656 Acc: 0.3117
valid Loss: 2.6789 Acc: 0.3136
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:09,  5.65it/s]


train Loss: 2.2421 Acc: 0.3966
valid Loss: 2.4106 Acc: 0.3815
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.9524 Acc: 0.4632
valid Loss: 2.1802 Acc: 0.4244
Saving model...
------------------------------

Epoch 6/20
----------


391it [01:10,  5.58it/s]


train Loss: 1.7663 Acc: 0.5091
valid Loss: 1.9521 Acc: 0.4752
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.6355 Acc: 0.5389
valid Loss: 1.7921 Acc: 0.5050
Saving model...
------------------------------

Epoch 8/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.5450 Acc: 0.5632
valid Loss: 1.6741 Acc: 0.5442
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.4606 Acc: 0.5844
valid Loss: 1.7797 Acc: 0.5248
------------------------------

Epoch 10/20
----------


391it [01:09,  5.66it/s]


train Loss: 1.3971 Acc: 0.6015
valid Loss: 1.9435 Acc: 0.4914
------------------------------

Epoch 11/20
----------


391it [01:09,  5.60it/s]


train Loss: 1.3448 Acc: 0.6144
valid Loss: 1.6185 Acc: 0.5587
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.2951 Acc: 0.6246
valid Loss: 1.6387 Acc: 0.5499
------------------------------

Epoch 13/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.2492 Acc: 0.6379
valid Loss: 1.8440 Acc: 0.5157
------------------------------

Epoch 14/20
----------


391it [01:09,  5.60it/s]


train Loss: 1.2074 Acc: 0.6506
valid Loss: 1.6338 Acc: 0.5591
Saving model...
------------------------------

Epoch 15/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.1745 Acc: 0.6566
valid Loss: 1.7177 Acc: 0.5603
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.1390 Acc: 0.6680
valid Loss: 1.6695 Acc: 0.5577
------------------------------

Epoch 17/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.1054 Acc: 0.6753
valid Loss: 1.4462 Acc: 0.6064
Saving model...
------------------------------

Epoch 18/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.0685 Acc: 0.6862
valid Loss: 1.5638 Acc: 0.5755
------------------------------

Epoch 19/20
----------


391it [01:09,  5.59it/s]


train Loss: 1.0433 Acc: 0.6929
valid Loss: 1.6284 Acc: 0.5704
------------------------------

Epoch 20/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.0082 Acc: 0.7034
valid Loss: 1.5192 Acc: 0.5840
------------------------------

Best model saved.
Training complete in 24m 57s
Best val Acc: 0.606400


0,1
Train - acc,▁▂▃▄▅▆▆▆▇▇▇▇▇▇▇█████
Train - loss,█▇▆▅▅▄▄▃▄▃▃▄▃▃▂▃▂▃▂▃▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▂▁▂
Valid - acc,▁▂▄▅▅▆▆▇▇▆▇▇▇▇▇▇██▇█
Valid - loss,█▆▅▄▃▃▂▂▂▃▂▂▂▂▂▂▁▁▂▁

0,1
Train - acc,0.70342
Train - loss,1.06535
Valid - acc,0.584
Valid - loss,1.51922


# **Mixup**

In [None]:
def mixup(inputs, labels, alpha):
  batch_size = inputs.size(0)
  shuffled_batch = torch.randperm(batch_size).to(config['device'])
  second_inputs = inputs[shuffled_batch]
  second_labels = labels[shuffled_batch]
  lam = np.random.beta(alpha, alpha)
  mixup_inputs = inputs * lam + second_inputs * (1 - lam)
  return mixup_inputs, labels, second_labels, lam

def mixup_criterion(criterion, output, original_labels, shuffled_labels, lam):
    return lam * criterion(output, original_labels) + (1 - lam) * criterion(output, shuffled_labels)


# **Training ResNet-18 + Mix Up on CIFAR-10 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR10'
config['model_name'] =  'resnet'
config['model_type'] = 'MixUp'
model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'] )

optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
    config=config,
    project='Project',
    save_code=True,
    group='resnet18_cifar10_mixup'):
  
  model_ft, hist = train_model(
      model_ft,
      CIFAR10_trainset,
      CIFAR10_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )


DAmethod -  MixUp
Epoch 1/20
----------


391it [01:09,  5.61it/s]


train Loss: 2.2598 Acc: 0.1881
valid Loss: 1.7518 Acc: 0.3710
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:10,  5.56it/s]


train Loss: 1.9485 Acc: 0.2480
valid Loss: 1.5301 Acc: 0.4511
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.8221 Acc: 0.2779
valid Loss: 1.4737 Acc: 0.4733
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:09,  5.61it/s]


train Loss: 1.7438 Acc: 0.3092
valid Loss: 1.3487 Acc: 0.5290
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:09,  5.64it/s]


train Loss: 1.6587 Acc: 0.3397
valid Loss: 1.3733 Acc: 0.5077
------------------------------

Epoch 6/20
----------


391it [01:09,  5.62it/s]


train Loss: 1.5803 Acc: 0.3548
valid Loss: 1.1621 Acc: 0.6065
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:10,  5.56it/s]


train Loss: 1.5405 Acc: 0.3636
valid Loss: 1.0832 Acc: 0.6372
Saving model...
------------------------------

Epoch 8/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.4544 Acc: 0.4077
valid Loss: 0.9519 Acc: 0.6965
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.4046 Acc: 0.4161
valid Loss: 0.9125 Acc: 0.7090
Saving model...
------------------------------

Epoch 10/20
----------


391it [01:10,  5.56it/s]


train Loss: 1.3621 Acc: 0.4352
valid Loss: 0.8787 Acc: 0.7282
Saving model...
------------------------------

Epoch 11/20
----------


391it [01:09,  5.60it/s]


train Loss: 1.3179 Acc: 0.4106
valid Loss: 0.7939 Acc: 0.7483
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:10,  5.58it/s]


train Loss: 1.3058 Acc: 0.4155
valid Loss: 0.7696 Acc: 0.7765
Saving model...
------------------------------

Epoch 13/20
----------


391it [01:09,  5.62it/s]


train Loss: 1.2991 Acc: 0.4431
valid Loss: 0.6674 Acc: 0.8019
Saving model...
------------------------------

Epoch 14/20
----------


391it [01:09,  5.63it/s]


train Loss: 1.2697 Acc: 0.4657
valid Loss: 0.8055 Acc: 0.7458
------------------------------

Epoch 15/20
----------


391it [01:09,  5.60it/s]


train Loss: 1.2540 Acc: 0.4434
valid Loss: 0.6484 Acc: 0.8228
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:09,  5.66it/s]


train Loss: 1.2353 Acc: 0.4505
valid Loss: 0.6364 Acc: 0.8280
Saving model...
------------------------------

Epoch 17/20
----------


391it [01:09,  5.65it/s]


train Loss: 1.1815 Acc: 0.4939
valid Loss: 0.6453 Acc: 0.8208
------------------------------

Epoch 18/20
----------


391it [01:10,  5.58it/s]


train Loss: 1.2018 Acc: 0.4569
valid Loss: 0.6348 Acc: 0.8223
------------------------------

Epoch 19/20
----------


391it [01:09,  5.62it/s]


train Loss: 1.1954 Acc: 0.4496
valid Loss: 0.6032 Acc: 0.8435
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:09,  5.66it/s]


train Loss: 1.1710 Acc: 0.4958
valid Loss: 0.6086 Acc: 0.8405
------------------------------

Best model saved.
Training complete in 25m 3s
Best val Acc: 0.843500


0,1
Train - acc,▁▂▃▄▄▅▅▆▆▇▆▆▇▇▇▇█▇▇█
Train - loss,██▇▇▅▇▆▇▄▅▅▇▆▆▄▄▅▃▃▂▅▅▆▃▃▆▁▅▂▆▅▅▄▅▅▅▁▅▄▅
Valid - acc,▁▂▃▃▃▄▅▆▆▆▇▇▇▇██████
Valid - loss,█▇▆▆▆▄▄▃▃▃▂▂▁▂▁▁▁▁▁▁

0,1
Train - acc,0.49576
Train - loss,1.5986
Valid - acc,0.8405
Valid - loss,0.60859


# **Training ResNet-18 + Mix Up on CIFAR-100 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR100'
config['model_name'] =  'resnet'
config['model_type'] = 'MixUp'
model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'] )
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
    config=config,
    project='Project',
    save_code=True,
    group='resnet18_cifar100_mixup'):
  
    model_ft, hist = train_model(
      model_ft,
      CIFAR100_trainset,
      CIFAR100_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )


     

DAmethod -  MixUp
Epoch 1/20
----------


391it [01:09,  5.64it/s]


train Loss: 4.2514 Acc: 0.0509
valid Loss: 3.7574 Acc: 0.1166
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:09,  5.62it/s]


train Loss: 3.8943 Acc: 0.0797
valid Loss: 3.2231 Acc: 0.2081
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:10,  5.53it/s]


train Loss: 3.6441 Acc: 0.1169
valid Loss: 2.9389 Acc: 0.2712
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:09,  5.62it/s]


train Loss: 3.3886 Acc: 0.1503
valid Loss: 2.5053 Acc: 0.3628
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:09,  5.64it/s]


train Loss: 3.2065 Acc: 0.1675
valid Loss: 2.3129 Acc: 0.4034
Saving model...
------------------------------

Epoch 6/20
----------


391it [01:09,  5.61it/s]


train Loss: 3.1322 Acc: 0.2026
valid Loss: 2.2106 Acc: 0.4287
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:09,  5.65it/s]


train Loss: 3.0460 Acc: 0.2053
valid Loss: 2.1892 Acc: 0.4274
------------------------------

Epoch 8/20
----------


391it [01:09,  5.66it/s]


train Loss: 2.8451 Acc: 0.2275
valid Loss: 1.9515 Acc: 0.4962
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:10,  5.58it/s]


train Loss: 2.7686 Acc: 0.2197
valid Loss: 1.8396 Acc: 0.5258
Saving model...
------------------------------

Epoch 10/20
----------


391it [01:09,  5.64it/s]


train Loss: 2.6821 Acc: 0.2719
valid Loss: 1.8010 Acc: 0.5273
Saving model...
------------------------------

Epoch 11/20
----------


391it [01:10,  5.55it/s]


train Loss: 2.6875 Acc: 0.2694
valid Loss: 1.7351 Acc: 0.5464
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:09,  5.66it/s]


train Loss: 2.6419 Acc: 0.2720
valid Loss: 1.7527 Acc: 0.5533
Saving model...
------------------------------

Epoch 13/20
----------


391it [01:09,  5.62it/s]


train Loss: 2.5740 Acc: 0.2977
valid Loss: 1.6171 Acc: 0.5814
Saving model...
------------------------------

Epoch 14/20
----------


391it [01:10,  5.57it/s]


train Loss: 2.5117 Acc: 0.2874
valid Loss: 1.6192 Acc: 0.5855
Saving model...
------------------------------

Epoch 15/20
----------


391it [01:09,  5.62it/s]


train Loss: 2.5172 Acc: 0.2907
valid Loss: 1.5287 Acc: 0.6055
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:09,  5.59it/s]


train Loss: 2.4336 Acc: 0.3232
valid Loss: 1.4554 Acc: 0.6153
Saving model...
------------------------------

Epoch 17/20
----------


391it [01:10,  5.55it/s]


train Loss: 2.4354 Acc: 0.3369
valid Loss: 1.5405 Acc: 0.5973
------------------------------

Epoch 18/20
----------


391it [01:09,  5.62it/s]


train Loss: 2.3297 Acc: 0.3253
valid Loss: 1.4946 Acc: 0.6211
Saving model...
------------------------------

Epoch 19/20
----------


391it [01:09,  5.61it/s]


train Loss: 2.3605 Acc: 0.3318
valid Loss: 1.4690 Acc: 0.6296
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:10,  5.55it/s]


train Loss: 2.3192 Acc: 0.3451
valid Loss: 1.3766 Acc: 0.6472
Saving model...
------------------------------

Best model saved.
Training complete in 25m 1s
Best val Acc: 0.647200


0,1
Train - acc,▁▂▃▃▄▅▅▅▅▆▆▆▇▇▇▇████
Train - loss,█▇█▇▇▅▅▅▄▃▄▆▆▅▆▅▃▃▄▄▅▆▁▆▅▄▃▂▃▃▅▃▅▂▅▁▅▅▂▄
Valid - acc,▁▂▃▄▅▅▅▆▆▆▇▇▇▇▇█▇███
Valid - loss,█▆▆▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
Train - acc,0.3451
Train - loss,2.73077
Valid - acc,0.6472
Valid - loss,1.37655


# **Cut Mix**

In [None]:
def cutmix(inputs, labels, alpha):
  batch_size = inputs.size(0)
  shuffled_batch = torch.randperm(batch_size).to(config['device'])
  second_inputs = inputs[shuffled_batch]
  second_labels = labels[shuffled_batch]
  lam = np.random.beta(alpha, alpha)
  
  mixup_inputs = torch.clone(inputs)
  W = mixup_inputs.size(2)
  H = mixup_inputs.size(3)

  for i in range(batch_size):
    height = int(W*np.sqrt(1-lam)) 
    width = int(H*np.sqrt(1-lam))
    randx = np.random.randint(0,W-width)
    randy = np.random.randint(0,H-height)

    mixup_inputs[i,:,randy:randy + height,randx:randx + width] = second_inputs[i,:,randy:randy + height,randx:randx + width]

  return mixup_inputs, labels, second_labels, lam

def mixup_criterion(criterion, output, original_labels, shuffled_labels, lam):
    return lam * criterion(output, original_labels) + (1 - lam) * criterion(output, shuffled_labels)

# **Training ResNet-18 + CutMix on CIFAR-10 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR10'
config['model_name'] =  'resnet'
config['model_type'] = 'CutMix'
model_ft, input_size = initialize_model('resnet', 'CIFAR10')
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(
    config=config,
    project='Project',
    save_code=True,
    group='resnet18_cifar10_cutmix'):
  model_ft, hist = train_model(
      model_ft,
      CIFAR10_trainset,
      CIFAR10_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )

DAmethod -  CutMix
Epoch 1/20
----------


391it [01:12,  5.41it/s]


train Loss: 2.1714 Acc: 0.1787
valid Loss: 1.6815 Acc: 0.3771
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:12,  5.36it/s]


train Loss: 1.8696 Acc: 0.2451
valid Loss: 1.3753 Acc: 0.5042
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:11,  5.44it/s]


train Loss: 1.7334 Acc: 0.2757
valid Loss: 1.3262 Acc: 0.5273
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:11,  5.45it/s]


train Loss: 1.6351 Acc: 0.3061
valid Loss: 1.1361 Acc: 0.6111
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:12,  5.40it/s]


train Loss: 1.5625 Acc: 0.3188
valid Loss: 1.2280 Acc: 0.5651
------------------------------

Epoch 6/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.5297 Acc: 0.3429
valid Loss: 0.9582 Acc: 0.6799
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.4986 Acc: 0.3367
valid Loss: 0.8671 Acc: 0.7136
Saving model...
------------------------------

Epoch 8/20
----------


391it [01:12,  5.41it/s]


train Loss: 1.4027 Acc: 0.3816
valid Loss: 0.7355 Acc: 0.7587
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:11,  5.48it/s]


train Loss: 1.3862 Acc: 0.3812
valid Loss: 0.8145 Acc: 0.7391
------------------------------

Epoch 10/20
----------


391it [01:11,  5.48it/s]


train Loss: 1.3203 Acc: 0.3735
valid Loss: 0.7636 Acc: 0.7545
------------------------------

Epoch 11/20
----------


391it [01:12,  5.43it/s]


train Loss: 1.3125 Acc: 0.3757
valid Loss: 0.6791 Acc: 0.7901
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.2689 Acc: 0.3859
valid Loss: 0.6042 Acc: 0.8280
Saving model...
------------------------------

Epoch 13/20
----------


391it [01:11,  5.48it/s]


train Loss: 1.2620 Acc: 0.3872
valid Loss: 0.6067 Acc: 0.8207
------------------------------

Epoch 14/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.2015 Acc: 0.3731
valid Loss: 0.6228 Acc: 0.8093
------------------------------

Epoch 15/20
----------


391it [01:11,  5.46it/s]


train Loss: 1.2262 Acc: 0.3980
valid Loss: 0.5482 Acc: 0.8459
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:11,  5.43it/s]


train Loss: 1.1800 Acc: 0.4081
valid Loss: 0.6146 Acc: 0.8304
------------------------------

Epoch 17/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.1649 Acc: 0.4130
valid Loss: 0.4880 Acc: 0.8531
Saving model...
------------------------------

Epoch 18/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.1662 Acc: 0.4190
valid Loss: 0.5470 Acc: 0.8470
------------------------------

Epoch 19/20
----------


391it [01:12,  5.41it/s]


train Loss: 1.1281 Acc: 0.4045
valid Loss: 0.4870 Acc: 0.8686
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:11,  5.47it/s]


train Loss: 1.1227 Acc: 0.3951
valid Loss: 0.5426 Acc: 0.8453
------------------------------

Best model saved.
Training complete in 25m 44s
Best val Acc: 0.868600


0,1
Train - acc,▁▃▄▅▅▆▆▇▇▇▇▇▇▇▇████▇
Train - loss,█▇▆▆▆▅▅▆▆▆▅▆▅▆▃▃▄▄▃▂▅▄▂▅▅▃▁▄▅▂▃▅▄▃▂▃▅▅▃▅
Valid - acc,▁▃▃▄▄▅▆▆▆▆▇▇▇▇█▇████
Valid - loss,█▆▆▅▅▄▃▂▃▃▂▂▂▂▁▂▁▁▁▁

0,1
Train - acc,0.39508
Train - loss,1.51644
Valid - acc,0.8453
Valid - loss,0.54261


# **Training ResNet-18 + CutMix on CIFAR-100 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR100'
config['model_name'] =  'resnet'
config['model_type'] = 'CutMix'
model_ft, input_size = initialize_model('resnet', 'CIFAR100')
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(config=config, project='Project', save_code=True, group='resnet18_cifar100_cutmix'):
  model_ft, hist = train_model(
      model_ft,
      CIFAR100_trainset,
      CIFAR100_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )

DAmethod -  CutMix
Epoch 1/20
----------


391it [01:11,  5.44it/s]


train Loss: 4.2562 Acc: 0.0379
valid Loss: 3.6775 Acc: 0.1310
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:12,  5.42it/s]


train Loss: 3.9412 Acc: 0.0669
valid Loss: 3.4524 Acc: 0.1708
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:11,  5.46it/s]


train Loss: 3.7339 Acc: 0.0914
valid Loss: 3.0207 Acc: 0.2496
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:11,  5.45it/s]


train Loss: 3.5262 Acc: 0.1065
valid Loss: 2.7094 Acc: 0.3149
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:11,  5.48it/s]


train Loss: 3.3303 Acc: 0.1302
valid Loss: 2.4198 Acc: 0.3748
Saving model...
------------------------------

Epoch 6/20
----------


391it [01:11,  5.49it/s]


train Loss: 3.2006 Acc: 0.1435
valid Loss: 2.3566 Acc: 0.3918
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:12,  5.40it/s]


train Loss: 3.0310 Acc: 0.1668
valid Loss: 2.1074 Acc: 0.4414
Saving model...
------------------------------

Epoch 8/20
----------


391it [01:11,  5.46it/s]


train Loss: 2.9365 Acc: 0.1999
valid Loss: 2.0581 Acc: 0.4620
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:11,  5.48it/s]


train Loss: 2.8679 Acc: 0.1801
valid Loss: 1.8904 Acc: 0.4991
Saving model...
------------------------------

Epoch 10/20
----------


391it [01:12,  5.42it/s]


train Loss: 2.7741 Acc: 0.1836
valid Loss: 1.7857 Acc: 0.5251
Saving model...
------------------------------

Epoch 11/20
----------


391it [01:11,  5.47it/s]


train Loss: 2.7628 Acc: 0.2093
valid Loss: 1.6696 Acc: 0.5536
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:11,  5.48it/s]


train Loss: 2.6994 Acc: 0.2180
valid Loss: 1.6665 Acc: 0.5592
Saving model...
------------------------------

Epoch 13/20
----------


391it [01:12,  5.40it/s]


train Loss: 2.6088 Acc: 0.2302
valid Loss: 1.6307 Acc: 0.5629
Saving model...
------------------------------

Epoch 14/20
----------


391it [01:11,  5.46it/s]


train Loss: 2.6018 Acc: 0.2380
valid Loss: 1.5171 Acc: 0.5971
Saving model...
------------------------------

Epoch 15/20
----------


391it [01:11,  5.48it/s]


train Loss: 2.5368 Acc: 0.2310
valid Loss: 1.5081 Acc: 0.6082
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:12,  5.43it/s]


train Loss: 2.4971 Acc: 0.2600
valid Loss: 1.4618 Acc: 0.6110
Saving model...
------------------------------

Epoch 17/20
----------


391it [01:11,  5.44it/s]


train Loss: 2.4236 Acc: 0.2613
valid Loss: 1.4705 Acc: 0.6253
Saving model...
------------------------------

Epoch 18/20
----------


391it [01:11,  5.49it/s]


train Loss: 2.4463 Acc: 0.2597
valid Loss: 1.4632 Acc: 0.6162
------------------------------

Epoch 19/20
----------


391it [01:12,  5.40it/s]


train Loss: 2.3556 Acc: 0.2482
valid Loss: 1.3236 Acc: 0.6384
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:11,  5.47it/s]


train Loss: 2.3588 Acc: 0.2718
valid Loss: 1.3645 Acc: 0.6429
Saving model...
------------------------------

Best model saved.
Training complete in 25m 43s
Best val Acc: 0.642900


0,1
Train - acc,▁▂▃▃▄▄▅▆▅▅▆▆▇▇▇███▇█
Train - loss,█▇▇▇▇▅▇▅▆▅▆▅▃▃▄▅▅▅▅▄▅▅▁▅▅▅▅▃▅▃▃▃▃▄▃▁▃▃▄▁
Valid - acc,▁▂▃▄▄▅▅▆▆▆▇▇▇▇██████
Valid - loss,█▇▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
Train - acc,0.27182
Train - loss,1.41847
Valid - acc,0.6429
Valid - loss,1.36455


# **AugMix**

In [None]:
def int_parameter(level,maxval):
  return int(level*maxval/10)

def float_parameter(level,maxval):
  return float(level)*maxval/10.

def sample_level(n):
  return np.random.uniform(low=0.1,high=n)

def autocontrast(pil_img, _ , image_size):
  return ImageOps.autocontrast(pil_img)

def equalize(pil_img, _, image_size):
  return ImageOps.equalize(pil_img)

def posterize(pil_img, level, image_size):
  return ImageOps.posterize(pil_img, 4 - int_parameter(sample_level(level), 4))

def rotate(pil_img, level, image_size):
  degrees = int_parameter(sample_level(level), 30)
  if np.random.uniform() > 0.5:
    degrees = -degrees
  return pil_img.rotate(degrees, resample=Image.BILINEAR)

def solarize(pil_img, level, image_size):
  return ImageOps.solarize(pil_img, 256 - int_parameter(sample_level(level), 256))

def shear_x(pil_img, level, image_size):
  level = float_parameter(sample_level(level), 0.3)
  if np.random.uniform() > 0.5:
    level = -level
  return pil_img.transform((image_size, image_size),Image.AFFINE, (1, level, 0, 0, 1, 0),resample=Image.BILINEAR)

def shear_y(pil_img, level, image_size):
  level = float_parameter(sample_level(level), 0.3)
  if np.random.uniform() > 0.5:
    level = -level
  return pil_img.transform((image_size, image_size),Image.AFFINE, (1, 0, 0, level, 1, 0),resample=Image.BILINEAR)

def translate_x(pil_img, level, image_size):
  level = int_parameter(sample_level(level), image_size / 3)
  if np.random.random() > 0.5:
    level = -level
  return pil_img.transform((image_size, image_size), Image.AFFINE, (1, 0, level, 0, 1, 0), resample=Image.BILINEAR)

def translate_y(pil_img, level, image_size):
  level = int_parameter(sample_level(level), image_size / 3)
  if np.random.random() > 0.5:
    level = -level
  return pil_img.transform((image_size, image_size), Image.AFFINE, (1, 0, 0, 0, 1, level), resample=Image.BILINEAR)

def color(pil_img, level, image_size):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Color(pil_img).enhance(level)

def contrast(pil_img, level, image_size):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Contrast(pil_img).enhance(level)

def brightness(pil_img, level, image_size):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Brightness(pil_img).enhance(level)

def sharpness(pil_img, level, image_size):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Sharpness(pil_img).enhance(level)

augmentations = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y
]

augmentations_all = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y, color, contrast, brightness, sharpness
]

def augmix(image, preprocess, dataset_name, mixture_width:int=3, aug_severity = 3, all_ops = True):
  
  if dataset_name == "mnist":
    size = 28
  else:
    size = 32

  aug_list = augmentations
  if all_ops:
    aug_list = augmentations_all
  
  ws = np.float32(np.random.dirichlet([1] * mixture_width))
  m = np.float32(np.random.beta(1,1))

  mix = torch.zeros_like(preprocess(image))

  for i in range(mixture_width):
    image_aug = image.copy()
    depth = np.random.randint(1, 4)
    for _ in range(depth):
      op = np.random.choice(aug_list)
      image_aug = op(image_aug, aug_severity, size)
    mix += ws[i] * preprocess(image_aug)
  
  
  mixed = (1-m) * preprocess(image) + m * mix

  return mixed

class AugMixDataset(torch.utils.data.Dataset):
  def __init__(self,dataset,preprocess,dataset_name,all_ops=True,jsd_loss=False):
    self.dataset = dataset
    self.preprocess = preprocess
    self.jsd_loss = jsd_loss
    self.dataset_name = dataset_name
    self.all_ops = all_ops
  
  def __getitem__(self, i):
    x, y = self.dataset[i]
    if self.jsd_loss:
      im_tuple = (
          self.preprocess(x), 
          augmix(x, self.preprocess, self.dataset_name, self.all_ops),
          augmix(x, self.preprocess, self.dataset_name, self.all_ops),
          )
      return im_tuple,y
    else:
      return augmix(x, self.preprocess, self.dataset_name, self.all_ops), y
  
  def __len__(self):
    return len(self.dataset)


# **Training ResNet-18 + AugMix on CIFAR-10 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR10'
config['model_name'] =  'resnet'
config['model_type'] = 'AugMix'
model_ft, input_size = initialize_model('resnet', 'CIFAR10')
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6 )
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(config=config, project='Project', save_code=True, group='resnet18_cifar10_augmix'):
  model_ft, hist = train_model(
      model_ft,
      CIFAR10_trainset,
      CIFAR10_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )


DAmethod -  AugMix
Epoch 1/20
----------


391it [01:09,  5.61it/s]


train Loss: 1.9922 Acc: 0.3119
valid Loss: 1.5976 Acc: 0.4167
Saving model...
------------------------------

Epoch 2/20
----------


391it [01:08,  5.68it/s]


train Loss: 1.3712 Acc: 0.4996
valid Loss: 1.2276 Acc: 0.5600
Saving model...
------------------------------

Epoch 3/20
----------


391it [01:09,  5.64it/s]


train Loss: 1.0794 Acc: 0.6147
valid Loss: 0.9912 Acc: 0.6494
Saving model...
------------------------------

Epoch 4/20
----------


391it [01:09,  5.60it/s]


train Loss: 0.9077 Acc: 0.6800
valid Loss: 0.8823 Acc: 0.6908
Saving model...
------------------------------

Epoch 5/20
----------


391it [01:09,  5.65it/s]


train Loss: 0.7818 Acc: 0.7261
valid Loss: 0.7882 Acc: 0.7325
Saving model...
------------------------------

Epoch 6/20
----------


391it [01:08,  5.68it/s]


train Loss: 0.6756 Acc: 0.7640
valid Loss: 0.6528 Acc: 0.7708
Saving model...
------------------------------

Epoch 7/20
----------


391it [01:10,  5.58it/s]


train Loss: 0.6015 Acc: 0.7918
valid Loss: 0.7006 Acc: 0.7664
------------------------------

Epoch 8/20
----------


391it [01:09,  5.67it/s]


train Loss: 0.5428 Acc: 0.8130
valid Loss: 0.5639 Acc: 0.8086
Saving model...
------------------------------

Epoch 9/20
----------


391it [01:08,  5.68it/s]


train Loss: 0.4914 Acc: 0.8313
valid Loss: 0.5711 Acc: 0.8030
------------------------------

Epoch 10/20
----------


391it [01:09,  5.66it/s]


train Loss: 0.4506 Acc: 0.8434
valid Loss: 0.6070 Acc: 0.7958
------------------------------

Epoch 11/20
----------


391it [01:08,  5.69it/s]


train Loss: 0.4124 Acc: 0.8583
valid Loss: 0.4915 Acc: 0.8389
Saving model...
------------------------------

Epoch 12/20
----------


391it [01:08,  5.71it/s]


train Loss: 0.3835 Acc: 0.8656
valid Loss: 0.5465 Acc: 0.8163
------------------------------

Epoch 13/20
----------


391it [01:09,  5.62it/s]


train Loss: 0.3557 Acc: 0.8767
valid Loss: 0.5145 Acc: 0.8327
------------------------------

Epoch 14/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.3276 Acc: 0.8880
valid Loss: 0.4782 Acc: 0.8376
------------------------------

Epoch 15/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.3041 Acc: 0.8955
valid Loss: 0.4398 Acc: 0.8549
Saving model...
------------------------------

Epoch 16/20
----------


391it [01:09,  5.60it/s]


train Loss: 0.2861 Acc: 0.9013
valid Loss: 0.4143 Acc: 0.8631
Saving model...
------------------------------

Epoch 17/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.2716 Acc: 0.9054
valid Loss: 0.3903 Acc: 0.8724
Saving model...
------------------------------

Epoch 18/20
----------


391it [01:08,  5.67it/s]


train Loss: 0.2484 Acc: 0.9147
valid Loss: 0.4104 Acc: 0.8671
------------------------------

Epoch 19/20
----------


391it [01:09,  5.61it/s]


train Loss: 0.2394 Acc: 0.9170
valid Loss: 0.3792 Acc: 0.8767
Saving model...
------------------------------

Epoch 20/20
----------


391it [01:08,  5.68it/s]


train Loss: 0.2205 Acc: 0.9226
valid Loss: 0.3759 Acc: 0.8800
Saving model...
------------------------------

Best model saved.
Training complete in 24m 50s
Best val Acc: 0.880000


0,1
Train - acc,▁▃▄▅▆▆▇▇▇▇▇▇▇███████
Train - loss,█▆▆▆▅▄▄▃▃▃▃▃▂▃▂▂▂▂▂▂▁▂▁▂▁▂▁▁▁▁▁▂▁▂▁▁▁▁▁▁
Valid - acc,▁▃▅▅▆▆▆▇▇▇▇▇▇▇██████
Valid - loss,█▆▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
Train - acc,0.9226
Train - loss,0.25736
Valid - acc,0.88
Valid - loss,0.37591


# **Training ResNet-18 + AugMix on CIFAR-100 Dataset**

In [None]:
config['dataset_name'] = 'CIFAR100'
config['model_name'] =  'resnet'
config['model_type'] = 'AugMix'
model_ft, input_size = initialize_model(config['model_name'], config['dataset_name'] )
optimizer = optim.SGD(model_ft.parameters(), lr=config['lr'], momentum=0.9, weight_decay = config['weight_decay'], nesterov = True)
scheduler = CosineAnnealingLR(optimizer, T_max = 60, eta_min = 1e-6)
criterion = nn.CrossEntropyLoss()
model_ft = model_ft.to(device)

with wandb.init(config=config, project='Project', save_code=True, group='resnet18_cifar100_augmix'):
  model_ft, hist = train_model(
      model_ft,
      CIFAR100_trainset,
      CIFAR100_testset,
      criterion,
      optimizer,
      model_type=config['model_type'],
      datasetname=config['dataset_name'],
      num_epochs=config['epochs'],
      scheduler=scheduler
      )