In [1]:
# import all libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms


import os
import argparse

import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['font.size'] = 16

In [2]:
import data_process
data_dict = data_process.preprocess_cifar100(
    cuda=True,dtype=torch.float64, flatten=False
)

print("Shape of X_train:", data_dict["X_train"].shape)
print("Shape of y_train:", data_dict["y_train"].shape)
print("Shape of X_val: ", data_dict["X_val"].shape)
print("Shape of y_val: ", data_dict["y_val"].shape)
print("Shape of X_test: ", data_dict["X_test"].shape)
print("Shape of y_test: ", data_dict["y_test"].shape)

Shape of X_train: torch.Size([40000, 3, 32, 32])
Shape of y_train: torch.Size([40000])
Shape of X_val:  torch.Size([10000, 3, 32, 32])
Shape of y_val:  torch.Size([10000])
Shape of X_test:  torch.Size([10000, 3, 32, 32])
Shape of y_test:  torch.Size([10000])


In [3]:
# Compute the mean and std of the whole training set 
# Before splitting into train and val

origin_train = torch.cat((data_dict["X_train"], data_dict["X_val"]), dim=0)
mean, std = data_process.compute_mean_std(origin_train)
print("The mean of RGB of training set is: ", mean)
print("The std of RGB of training set is: ", std)

The mean of RGB of training set is:  tensor([0.5071, 0.4865, 0.4409], device='cuda:0', dtype=torch.float64)
The std of RGB of training set is:  tensor([0.2673, 0.2564, 0.2762], device='cuda:0', dtype=torch.float64)


In [4]:
# Show the proportion of each class in the new X_train
counts = torch.bincount(data_dict["y_train"])
proportions = counts / 40000 * 100.0
labels = data_dict["y_train"].unique()
proportion_result = torch.stack((labels, proportions), dim=0)
print(proportion_result)

tensor([[ 0.0000,  1.0000,  2.0000,  3.0000,  4.0000,  5.0000,  6.0000,  7.0000,
          8.0000,  9.0000, 10.0000, 11.0000, 12.0000, 13.0000, 14.0000, 15.0000,
         16.0000, 17.0000, 18.0000, 19.0000, 20.0000, 21.0000, 22.0000, 23.0000,
         24.0000, 25.0000, 26.0000, 27.0000, 28.0000, 29.0000, 30.0000, 31.0000,
         32.0000, 33.0000, 34.0000, 35.0000, 36.0000, 37.0000, 38.0000, 39.0000,
         40.0000, 41.0000, 42.0000, 43.0000, 44.0000, 45.0000, 46.0000, 47.0000,
         48.0000, 49.0000, 50.0000, 51.0000, 52.0000, 53.0000, 54.0000, 55.0000,
         56.0000, 57.0000, 58.0000, 59.0000, 60.0000, 61.0000, 62.0000, 63.0000,
         64.0000, 65.0000, 66.0000, 67.0000, 68.0000, 69.0000, 70.0000, 71.0000,
         72.0000, 73.0000, 74.0000, 75.0000, 76.0000, 77.0000, 78.0000, 79.0000,
         80.0000, 81.0000, 82.0000, 83.0000, 84.0000, 85.0000, 86.0000, 87.0000,
         88.0000, 89.0000, 90.0000, 91.0000, 92.0000, 93.0000, 94.0000, 95.0000,
         96.0000, 97.0000, 9

In [5]:
# Try the DataLoaders for MobileNet training (Refer to the ResNet notebook)
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='.', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR10(
    root='.', train=False, download=True, transform=transform_test)

# we can use a larger batch size during test, because we do not save 
# intermediate variables for gradient computation, which leaves more memory

generator = torch.Generator().manual_seed(0)
trainset_new, valset = torch.utils.data.random_split(trainset, (0.8, 0.2), generator=generator)

trainloader = torch.utils.data.DataLoader(trainset_new, batch_size=128, shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# MobileNet Model
class Block(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out


class MobileNet(nn.Module):
    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]

    def __init__(self, num_classes=10):
        super(MobileNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.linear = nn.Linear(1024, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            out_planes = x if isinstance(x, int) else x[0]
            stride = 1 if isinstance(x, int) else x[1]
            layers.append(Block(in_planes, out_planes, stride))
            in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


In [98]:
import numpy as np

def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).cuda()
    
    mixed_x = lam * x + (1 - lam) * x[index,:]
    y_a, y_b = y, y[index]
    
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [99]:
def train(epoch, net, criterion, trainloader, scheduler, alpha):
    device = 'cuda'
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, alpha)
        
        outputs = net(inputs)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        train_loss += loss.data.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += (lam * predicted.eq(targets_a.data).sum().item()
                    + (1 - lam) * predicted.eq(targets_b.data).sum().item())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        
        if (batch_idx+1) % 50 == 0:
          print("iteration : %3d, loss : %0.4f, accuracy : %2.2f" % (batch_idx+1, train_loss/(batch_idx+1), 100.*correct/total))

    scheduler.step()
    return train_loss/(batch_idx+1), 100.*correct/total

In [100]:
def val(epoch, net, criterion, valloader):
    device = 'cuda'
    net.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.inference_mode():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return val_loss/(batch_idx+1), 100.*correct/total

In [101]:
def test(epoch, net, criterion, testloader):
    device = 'cuda'
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.inference_mode():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    return test_loss/(batch_idx+1), 100.*correct/total

In [102]:
def save_checkpoint(net, acc, epoch):
    # Save checkpoint.
    print('Saving..')
    state = {
        'net': net.state_dict(),
        'acc': acc,
        'epoch': epoch,
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, ('./checkpoint/ckpt%3d.pth') % (epoch))

In [103]:
# Learning rate part
config = {'lr': 0.05, 
          'momentum': 0.9, 
          'weight_decay': 1e-4,
          'alpha': 0.2}

net = MobileNet().to('cuda')
criterion = nn.CrossEntropyLoss().to('cuda')
optimizer = optim.SGD(net.parameters(), lr=config['lr'],
                     momentum=config['momentum'], 
                     weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300, eta_min=0)

train_loss = []
train_acc = []
val_loss = []
val_acc = []
best_val = -1.0
best_cnn_model = None
# learning_rate = []
for epoch in range(300):
    train_loss_, train_acc_ = train(epoch, net, criterion, trainloader, scheduler=scheduler, alpha=config['alpha'])
    val_loss_, val_acc_ = val(epoch, net, criterion, valloader)
    # new_lr = scheduler.get_last_lr()
    
    train_loss.append(train_loss_)
    val_loss.append(val_loss_)
    train_acc.append(train_acc_/100)
    val_acc.append(val_acc_/100)
    # learning_rate.append(new_lr)

    if val_acc_ > best_val:
        best_val = val_acc_
        best_cnn_model = net
        save_checkpoint(best_cnn_model, best_val, epoch)
    
    print(("Epoch : %3d, training loss : %0.4f, training accuracy : %2.2f, val loss " + \
      ": %0.4f, val accuracy : %2.2f") % (epoch, train_loss_, train_acc_, val_loss_, val_acc_))
    # print("The updated learning rate is: ", new_lr)



Epoch: 0
iteration :  50, loss : 2.3264, accuracy : 15.37
iteration : 100, loss : 2.1711, accuracy : 20.48
iteration : 150, loss : 2.0759, accuracy : 23.96
iteration : 200, loss : 2.0219, accuracy : 26.21
iteration : 250, loss : 1.9807, accuracy : 27.77
iteration : 300, loss : 1.9467, accuracy : 29.23
Saving..
Epoch :   0, training loss : 1.9380, training accuracy : 29.53, val loss : 1.5947, val accuracy : 41.29

Epoch: 1
iteration :  50, loss : 1.7357, accuracy : 38.41
iteration : 100, loss : 1.7034, accuracy : 39.41
iteration : 150, loss : 1.6834, accuracy : 40.16
iteration : 200, loss : 1.6724, accuracy : 40.64
iteration : 250, loss : 1.6681, accuracy : 40.89
iteration : 300, loss : 1.6553, accuracy : 41.52
Saving..
Epoch :   1, training loss : 1.6476, training accuracy : 41.76, val loss : 1.4430, val accuracy : 48.06

Epoch: 2
iteration :  50, loss : 1.6147, accuracy : 43.67
iteration : 100, loss : 1.6303, accuracy : 43.36
iteration : 150, loss : 1.6091, accuracy : 44.24
iteration

Epoch :  19, training loss : 0.8601, training accuracy : 73.77, val loss : 0.6380, val accuracy : 79.72

Epoch: 20
iteration :  50, loss : 0.7311, accuracy : 77.86
iteration : 100, loss : 0.7039, accuracy : 78.74
iteration : 150, loss : 0.7200, accuracy : 78.55
iteration : 200, loss : 0.7333, accuracy : 78.08
iteration : 250, loss : 0.7649, accuracy : 76.99
iteration : 300, loss : 0.7683, accuracy : 76.70
Epoch :  20, training loss : 0.7670, training accuracy : 76.74, val loss : 0.5658, val accuracy : 81.48

Epoch: 21
iteration :  50, loss : 0.8000, accuracy : 75.88
iteration : 100, loss : 0.7416, accuracy : 77.61
iteration : 150, loss : 0.7528, accuracy : 77.28
iteration : 200, loss : 0.7760, accuracy : 76.64
iteration : 250, loss : 0.8039, accuracy : 75.60
iteration : 300, loss : 0.8108, accuracy : 75.45
Epoch :  21, training loss : 0.8142, training accuracy : 75.31, val loss : 0.5961, val accuracy : 81.14

Epoch: 22
iteration :  50, loss : 0.7373, accuracy : 78.78
iteration : 100, l

iteration : 300, loss : 0.6840, accuracy : 80.29
Saving..
Epoch :  39, training loss : 0.6728, training accuracy : 80.59, val loss : 0.4643, val accuracy : 84.78

Epoch: 40
iteration :  50, loss : 0.5186, accuracy : 85.32
iteration : 100, loss : 0.6395, accuracy : 81.31
iteration : 150, loss : 0.6626, accuracy : 80.64
iteration : 200, loss : 0.6884, accuracy : 80.01
iteration : 250, loss : 0.7080, accuracy : 79.54
iteration : 300, loss : 0.6996, accuracy : 79.79
Epoch :  40, training loss : 0.6880, training accuracy : 80.11, val loss : 0.5164, val accuracy : 83.66

Epoch: 41
iteration :  50, loss : 0.7763, accuracy : 77.13
iteration : 100, loss : 0.7662, accuracy : 77.39
iteration : 150, loss : 0.7142, accuracy : 78.64
iteration : 200, loss : 0.7360, accuracy : 78.23
iteration : 250, loss : 0.7072, accuracy : 79.14
iteration : 300, loss : 0.7164, accuracy : 78.83
Saving..
Epoch :  41, training loss : 0.7179, training accuracy : 78.86, val loss : 0.4729, val accuracy : 85.37

Epoch: 42


iteration : 300, loss : 0.5920, accuracy : 83.10
Epoch :  59, training loss : 0.6010, training accuracy : 82.83, val loss : 0.4822, val accuracy : 85.73

Epoch: 60
iteration :  50, loss : 0.5024, accuracy : 85.69
iteration : 100, loss : 0.5811, accuracy : 82.91
iteration : 150, loss : 0.6152, accuracy : 82.10
iteration : 200, loss : 0.6419, accuracy : 81.36
iteration : 250, loss : 0.6340, accuracy : 81.64
iteration : 300, loss : 0.6155, accuracy : 82.26
Epoch :  60, training loss : 0.6177, training accuracy : 82.15, val loss : 0.4406, val accuracy : 86.24

Epoch: 61
iteration :  50, loss : 0.6062, accuracy : 82.79
iteration : 100, loss : 0.6174, accuracy : 82.23
iteration : 150, loss : 0.6040, accuracy : 82.87
iteration : 200, loss : 0.6209, accuracy : 82.54
iteration : 250, loss : 0.6290, accuracy : 82.27
iteration : 300, loss : 0.6134, accuracy : 82.73
Epoch :  61, training loss : 0.6064, training accuracy : 82.93, val loss : 0.4378, val accuracy : 86.13

Epoch: 62
iteration :  50, l

iteration : 300, loss : 0.5693, accuracy : 84.35
Epoch :  79, training loss : 0.5703, training accuracy : 84.31, val loss : 0.4465, val accuracy : 86.32

Epoch: 80
iteration :  50, loss : 0.6201, accuracy : 82.39
iteration : 100, loss : 0.6504, accuracy : 81.20
iteration : 150, loss : 0.6240, accuracy : 82.12
iteration : 200, loss : 0.6309, accuracy : 81.79
iteration : 250, loss : 0.6234, accuracy : 82.04
iteration : 300, loss : 0.6270, accuracy : 82.01
Epoch :  80, training loss : 0.6299, training accuracy : 82.00, val loss : 0.4414, val accuracy : 86.77

Epoch: 81
iteration :  50, loss : 0.6504, accuracy : 81.09
iteration : 100, loss : 0.6763, accuracy : 80.38
iteration : 150, loss : 0.6700, accuracy : 80.47
iteration : 200, loss : 0.6532, accuracy : 81.18
iteration : 250, loss : 0.6383, accuracy : 81.73
iteration : 300, loss : 0.6091, accuracy : 82.59
Epoch :  81, training loss : 0.6078, training accuracy : 82.61, val loss : 0.4395, val accuracy : 86.41

Epoch: 82
iteration :  50, l

iteration : 300, loss : 0.5598, accuracy : 84.36
Epoch :  99, training loss : 0.5620, training accuracy : 84.37, val loss : 0.4557, val accuracy : 86.26

Epoch: 100
iteration :  50, loss : 0.5442, accuracy : 84.64
iteration : 100, loss : 0.5412, accuracy : 84.72
iteration : 150, loss : 0.5250, accuracy : 85.30
iteration : 200, loss : 0.5100, accuracy : 85.80
iteration : 250, loss : 0.5279, accuracy : 85.24
iteration : 300, loss : 0.5298, accuracy : 85.22
Epoch : 100, training loss : 0.5326, training accuracy : 85.15, val loss : 0.4096, val accuracy : 87.63

Epoch: 101
iteration :  50, loss : 0.5920, accuracy : 83.66
iteration : 100, loss : 0.5508, accuracy : 84.49
iteration : 150, loss : 0.5852, accuracy : 83.44
iteration : 200, loss : 0.5617, accuracy : 84.03
iteration : 250, loss : 0.5686, accuracy : 83.71
iteration : 300, loss : 0.5757, accuracy : 83.45
Epoch : 101, training loss : 0.5663, training accuracy : 83.73, val loss : 0.4191, val accuracy : 87.03

Epoch: 102
iteration :  50

iteration : 300, loss : 0.5401, accuracy : 84.75
Epoch : 119, training loss : 0.5415, training accuracy : 84.79, val loss : 0.4232, val accuracy : 88.02

Epoch: 120
iteration :  50, loss : 0.5430, accuracy : 85.21
iteration : 100, loss : 0.6023, accuracy : 83.67
iteration : 150, loss : 0.5546, accuracy : 84.64
iteration : 200, loss : 0.5227, accuracy : 85.40
iteration : 250, loss : 0.5443, accuracy : 84.90
iteration : 300, loss : 0.5540, accuracy : 84.78
Epoch : 120, training loss : 0.5499, training accuracy : 84.89, val loss : 0.4127, val accuracy : 87.82

Epoch: 121
iteration :  50, loss : 0.5957, accuracy : 83.55
iteration : 100, loss : 0.5696, accuracy : 84.46
iteration : 150, loss : 0.5438, accuracy : 85.03
iteration : 200, loss : 0.5312, accuracy : 85.40
iteration : 250, loss : 0.5406, accuracy : 84.98
iteration : 300, loss : 0.5396, accuracy : 85.18
Epoch : 121, training loss : 0.5375, training accuracy : 85.24, val loss : 0.4050, val accuracy : 88.05

Epoch: 122
iteration :  50

iteration : 300, loss : 0.5246, accuracy : 85.73
Epoch : 139, training loss : 0.5272, training accuracy : 85.76, val loss : 0.4059, val accuracy : 88.27

Epoch: 140
iteration :  50, loss : 0.4969, accuracy : 86.77
iteration : 100, loss : 0.4938, accuracy : 86.88
iteration : 150, loss : 0.5068, accuracy : 86.26
iteration : 200, loss : 0.5133, accuracy : 86.01
iteration : 250, loss : 0.5199, accuracy : 85.67
iteration : 300, loss : 0.5085, accuracy : 86.12
Saving..
Epoch : 140, training loss : 0.5170, training accuracy : 85.87, val loss : 0.4195, val accuracy : 88.54

Epoch: 141
iteration :  50, loss : 0.5456, accuracy : 85.16
iteration : 100, loss : 0.5269, accuracy : 85.66
iteration : 150, loss : 0.5457, accuracy : 85.13
iteration : 200, loss : 0.5298, accuracy : 85.46
iteration : 250, loss : 0.5402, accuracy : 85.17
iteration : 300, loss : 0.5276, accuracy : 85.64
Epoch : 141, training loss : 0.5386, training accuracy : 85.27, val loss : 0.4240, val accuracy : 88.38

Epoch: 142
iterat

iteration : 250, loss : 0.5222, accuracy : 85.74
iteration : 300, loss : 0.5233, accuracy : 85.81
Epoch : 159, training loss : 0.5201, training accuracy : 85.89, val loss : 0.3868, val accuracy : 88.84

Epoch: 160
iteration :  50, loss : 0.4978, accuracy : 86.67
iteration : 100, loss : 0.5581, accuracy : 84.66
iteration : 150, loss : 0.4991, accuracy : 86.54
iteration : 200, loss : 0.4912, accuracy : 86.53
iteration : 250, loss : 0.4657, accuracy : 87.24
iteration : 300, loss : 0.4725, accuracy : 87.04
Epoch : 160, training loss : 0.4671, training accuracy : 87.26, val loss : 0.3831, val accuracy : 88.88

Epoch: 161
iteration :  50, loss : 0.4902, accuracy : 86.94
iteration : 100, loss : 0.4755, accuracy : 87.32
iteration : 150, loss : 0.4952, accuracy : 86.91
iteration : 200, loss : 0.5358, accuracy : 85.61
iteration : 250, loss : 0.5055, accuracy : 86.38
iteration : 300, loss : 0.5021, accuracy : 86.43
Epoch : 161, training loss : 0.5029, training accuracy : 86.42, val loss : 0.4102,

iteration : 250, loss : 0.4887, accuracy : 86.94
iteration : 300, loss : 0.4923, accuracy : 86.84
Epoch : 179, training loss : 0.4789, training accuracy : 87.24, val loss : 0.3893, val accuracy : 89.04

Epoch: 180
iteration :  50, loss : 0.3616, accuracy : 90.23
iteration : 100, loss : 0.3797, accuracy : 90.08
iteration : 150, loss : 0.3684, accuracy : 90.31
iteration : 200, loss : 0.4223, accuracy : 88.73
iteration : 250, loss : 0.4426, accuracy : 87.98
iteration : 300, loss : 0.4485, accuracy : 87.89
Epoch : 180, training loss : 0.4444, training accuracy : 88.05, val loss : 0.3862, val accuracy : 89.17

Epoch: 181
iteration :  50, loss : 0.4906, accuracy : 86.95
iteration : 100, loss : 0.4920, accuracy : 86.76
iteration : 150, loss : 0.5153, accuracy : 86.06
iteration : 200, loss : 0.4925, accuracy : 87.03
iteration : 250, loss : 0.5002, accuracy : 86.64
iteration : 300, loss : 0.4938, accuracy : 86.91
Epoch : 181, training loss : 0.4937, training accuracy : 86.90, val loss : 0.3974,

iteration : 250, loss : 0.4349, accuracy : 88.61
iteration : 300, loss : 0.4573, accuracy : 87.95
Epoch : 199, training loss : 0.4591, training accuracy : 87.90, val loss : 0.3865, val accuracy : 89.46

Epoch: 200
iteration :  50, loss : 0.5013, accuracy : 86.68
iteration : 100, loss : 0.5067, accuracy : 86.55
iteration : 150, loss : 0.4846, accuracy : 86.87
iteration : 200, loss : 0.4620, accuracy : 87.47
iteration : 250, loss : 0.4584, accuracy : 87.60
iteration : 300, loss : 0.4528, accuracy : 87.76
Epoch : 200, training loss : 0.4461, training accuracy : 87.93, val loss : 0.3658, val accuracy : 90.13

Epoch: 201
iteration :  50, loss : 0.4764, accuracy : 88.06
iteration : 100, loss : 0.4764, accuracy : 87.37
iteration : 150, loss : 0.4358, accuracy : 88.78
iteration : 200, loss : 0.4583, accuracy : 88.22
iteration : 250, loss : 0.4471, accuracy : 88.61
iteration : 300, loss : 0.4569, accuracy : 88.30
Epoch : 201, training loss : 0.4533, training accuracy : 88.40, val loss : 0.3868,

iteration : 250, loss : 0.4308, accuracy : 88.52
iteration : 300, loss : 0.3967, accuracy : 89.50
Epoch : 219, training loss : 0.4014, training accuracy : 89.39, val loss : 0.3637, val accuracy : 90.32

Epoch: 220
iteration :  50, loss : 0.3474, accuracy : 91.19
iteration : 100, loss : 0.3791, accuracy : 90.92
iteration : 150, loss : 0.3977, accuracy : 90.00
iteration : 200, loss : 0.3828, accuracy : 90.44
iteration : 250, loss : 0.3753, accuracy : 90.66
iteration : 300, loss : 0.3594, accuracy : 91.04
Saving..
Epoch : 220, training loss : 0.3689, training accuracy : 90.67, val loss : 0.3519, val accuracy : 90.64

Epoch: 221
iteration :  50, loss : 0.3344, accuracy : 91.28
iteration : 100, loss : 0.3788, accuracy : 89.62
iteration : 150, loss : 0.3764, accuracy : 90.12
iteration : 200, loss : 0.3899, accuracy : 90.00
iteration : 250, loss : 0.3898, accuracy : 89.97
iteration : 300, loss : 0.3911, accuracy : 90.03
Epoch : 221, training loss : 0.3950, training accuracy : 89.92, val loss 

iteration : 200, loss : 0.4524, accuracy : 88.17
iteration : 250, loss : 0.4403, accuracy : 88.60
iteration : 300, loss : 0.4441, accuracy : 88.65
Epoch : 239, training loss : 0.4387, training accuracy : 88.83, val loss : 0.3549, val accuracy : 90.77

Epoch: 240
iteration :  50, loss : 0.4054, accuracy : 89.50
iteration : 100, loss : 0.4577, accuracy : 87.67
iteration : 150, loss : 0.4098, accuracy : 89.34
iteration : 200, loss : 0.4079, accuracy : 89.39
iteration : 250, loss : 0.4028, accuracy : 89.66
iteration : 300, loss : 0.4038, accuracy : 89.65
Epoch : 240, training loss : 0.3988, training accuracy : 89.80, val loss : 0.3436, val accuracy : 90.94

Epoch: 241
iteration :  50, loss : 0.4259, accuracy : 88.15
iteration : 100, loss : 0.4193, accuracy : 88.33
iteration : 150, loss : 0.4094, accuracy : 89.08
iteration : 200, loss : 0.4254, accuracy : 88.60
iteration : 250, loss : 0.4175, accuracy : 88.78
iteration : 300, loss : 0.4097, accuracy : 88.93
Saving..
Epoch : 241, training lo

iteration : 200, loss : 0.4252, accuracy : 88.49
iteration : 250, loss : 0.4290, accuracy : 88.44
iteration : 300, loss : 0.4185, accuracy : 88.87
Epoch : 259, training loss : 0.4119, training accuracy : 89.11, val loss : 0.3406, val accuracy : 91.31

Epoch: 260
iteration :  50, loss : 0.3788, accuracy : 90.61
iteration : 100, loss : 0.3690, accuracy : 90.82
iteration : 150, loss : 0.3258, accuracy : 91.84
iteration : 200, loss : 0.3515, accuracy : 90.89
iteration : 250, loss : 0.3671, accuracy : 90.43
iteration : 300, loss : 0.3540, accuracy : 90.93
Epoch : 260, training loss : 0.3608, training accuracy : 90.82, val loss : 0.3607, val accuracy : 90.94

Epoch: 261
iteration :  50, loss : 0.3849, accuracy : 90.38
iteration : 100, loss : 0.4005, accuracy : 89.36
iteration : 150, loss : 0.4256, accuracy : 88.71
iteration : 200, loss : 0.4467, accuracy : 88.03
iteration : 250, loss : 0.4326, accuracy : 88.67
iteration : 300, loss : 0.4329, accuracy : 88.56
Saving..
Epoch : 261, training lo

iteration : 200, loss : 0.3588, accuracy : 91.20
iteration : 250, loss : 0.3719, accuracy : 90.85
iteration : 300, loss : 0.3874, accuracy : 90.29
Epoch : 279, training loss : 0.3916, training accuracy : 90.17, val loss : 0.3438, val accuracy : 91.32

Epoch: 280
iteration :  50, loss : 0.3355, accuracy : 92.10
iteration : 100, loss : 0.3094, accuracy : 92.93
iteration : 150, loss : 0.3284, accuracy : 92.18
iteration : 200, loss : 0.3372, accuracy : 91.90
iteration : 250, loss : 0.3397, accuracy : 91.69
iteration : 300, loss : 0.3335, accuracy : 91.98
Epoch : 280, training loss : 0.3481, training accuracy : 91.61, val loss : 0.3571, val accuracy : 91.19

Epoch: 281
iteration :  50, loss : 0.4316, accuracy : 89.21
iteration : 100, loss : 0.4136, accuracy : 89.49
iteration : 150, loss : 0.4112, accuracy : 89.61
iteration : 200, loss : 0.4003, accuracy : 90.05
iteration : 250, loss : 0.4143, accuracy : 89.64
iteration : 300, loss : 0.3989, accuracy : 90.01
Epoch : 281, training loss : 0.39

iteration : 200, loss : 0.3829, accuracy : 90.06
iteration : 250, loss : 0.3770, accuracy : 90.40
iteration : 300, loss : 0.3859, accuracy : 90.13
Epoch : 299, training loss : 0.3866, training accuracy : 90.12, val loss : 0.3380, val accuracy : 91.71


In [106]:
val_test_acc = val(epoch, best_cnn_model, criterion, valloader)
print(val_test_acc)

(0.3394060706413245, 91.64)


In [107]:
test_acc = test(epoch, best_cnn_model, criterion, testloader)
print("The test accuracy of the best model is: ", test_acc)

The test accuracy of the best model is:  (0.333588894084096, 91.57)


In [64]:
from utils import plot_loss_acc

ta = [t.cpu().numpy() for t in train_acc]

plot_loss_acc(train_loss, val_loss, ta, val_acc, "loss_acc_diagram_mixup")

#plt.plot(range(len(train_loss)), train_loss, 'b')
#plt.plot(range(len(test_loss)), test_loss, 'r')
#plt.xlabel("Number of epochs")
#plt.ylabel("Loss")
#plt.title("Logistic Regression: Loss vs Number of epochs")
#plt.legend(['train', 'test'])
#plt.show()

In [25]:
import numpy as np
def plot_learning_rate(learning_rate, figname):
    x = np.arange(len(learning_rate))
    max_lr = 1.0
    min_lr = 0.0
    plt.xlabel("epoch")
    plt.ylabel("learning rate")
    plt.plot(x, learning_rate, label="learning rate curve")
    plt.title(figname)
    plt.savefig(os.path.join('./', figname))

plt.clf()
plot_learning_rate(learning_rate, "lr_diagram_cosine_schedule_300_epochs.png")

In [105]:
print("Train loss is: ", train_loss, ", with a min value of: ", min(train_loss))
print("Val loss is: ", val_loss, ", with a min value of: ", min(val_loss))
print("Train accuracy is: ", ta, ", with a max value of: ", max(ta))
print("Val accuracy is: ", val_acc, ", with a max value of: ", max(val_acc))

Train loss is:  [1.9380317934024067, 1.6475613140069638, 1.560358148032484, 1.4024820049730733, 1.3128777940433247, 1.253766577655134, 1.150853529144019, 1.1231219340056275, 1.0638390836624292, 1.0043195134725054, 1.0155985690534306, 0.9646279515740209, 0.9246122246733108, 0.92717438364943, 0.8787821355147865, 0.8894828356112154, 0.8739283360040988, 0.8445886195467683, 0.8498075768208733, 0.8601226619066903, 0.7670159915003913, 0.8142044987446203, 0.7682687590202204, 0.741738436225885, 0.7513336721605386, 0.7295574149765527, 0.7067025245759434, 0.8154496096384031, 0.759247512244188, 0.7368058099533422, 0.7338492307133568, 0.7140759309164633, 0.7228552980449634, 0.705282554078026, 0.6711295556050901, 0.710843091955581, 0.7448324011728025, 0.6448681313104142, 0.6691442753274601, 0.672795738560704, 0.6879534619018293, 0.717868373845332, 0.7073381217999961, 0.6875257927198379, 0.6888261439796454, 0.6688861362279033, 0.655400006035075, 0.6621934574918625, 0.6403262472381226, 0.6360070123649