# Problem 2-vgg accuracy compare

In [2]:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import torch
import os
import random
from tqdm import tqdm as tqdm
from IPython import display

from model.vgg import VGG_A
# from model.vgg import VGG_A_Light
from model.vgg_batchnorm import VGG_A_BatchNorm
from torchvision import datasets, transforms

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# train and evaluation function

# ## Constants (parameters) initialization
device_id = [0,1,2,3]
num_workers = 4
batch_size = 128

# add our package dir to path 
module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')

# Make sure you are using the right device.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# # load dataset from cifar10
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=False, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


for X,y in train_loader:
    print(X[0])
    print(y[0])
    print(X[0].shape)
    img = np.transpose(X[0], [1,2,0])
    plt.imshow(img*0.5 + 0.5)
    plt.savefig('sample.png')
    print(X[0].max())
    print(X[0].min())
    break



# This function is used to calculate the accuracy of model classification
def get_accuracy(pred,y):
    return pred.eq(y.view_as(pred)).sum().item()

# Set a random seed to ensure reproducible results
def set_random_seeds(seed_value=0, device='cpu'):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    random.seed(seed_value)
    if device != 'cpu': 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# train function
def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100, best_model_path=None):
    model.to(device)
    learning_curve = [0] * epochs_n
    train_accuracy_curve = [0] * epochs_n
    val_accuracy_curve = [0] * epochs_n
    max_val_accuracy = 0
    max_val_accuracy_epoch = 0

    batches_n = len(train_loader)
    losses_list = []
    grads = []
    for epoch in tqdm(range(epochs_n), unit='epoch'):
        if scheduler is not None:
            scheduler.step()
        model.train()

        loss_list = []  # use this to record the loss value of each step
        grad = []  # use this to record the loss gradient of each step
        learning_curve[epoch] = 0  # maintain this to plot the training curve

        for data in train_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            loss_list.append(loss.item())
            temp = model.classifier[4].weight.grad
            grad.append(temp)
            pred = prediction.argmax(dim = 1)

            loss.backward()
            optimizer.step()

        losses_list.append(loss_list)
        grads.append(grad)
        display.clear_output(wait=True)
        f, axes = plt.subplots(1, 2, figsize=(15, 3))

        learning_curve[epoch] /= batches_n
        axes[0].plot(learning_curve)

        model.eval()
        batches_n = len(val_loader.dataset)
        for data in val_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            pred = prediction.argmax(dim = 1)
            val_accuracy_curve[epoch] += get_accuracy(pred,y)
            
        val_accuracy_curve[epoch]  = val_accuracy_curve[epoch] /batches_n
        if max_val_accuracy < val_accuracy_curve[epoch]:
            max_val_accuracy = val_accuracy_curve[epoch]
            max_val_accuracy_epoch = epoch
        
        print("epoch:{}, valid accuracy:{}, max valid accuracy:{}, max valid accuracy epoch:{}".format(epoch, val_accuracy_curve[epoch], max_val_accuracy,max_val_accuracy_epoch))
    

    return losses_list, grads, val_accuracy_curve


# change file save path
epo = 20
loss_save_path = ''
grad_save_path = ''

# set random seed here 
set_random_seeds(seed_value=2020, device=device)




cuda
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
tensor([[[ 0.1373,  0.1843,  0.1529,  ..., -0.5529, -0.6000, -0.6627],
         [ 0.3255,  0.3725,  0.3255,  ..., -0.2078, -0.2000, -0.2863],
         [ 0.3882,  0.4353,  0.4275,  ...,  0.3333,  0.3804,  0.2392],
         ...,
         [ 0.3412,  0.3804,  0.4275,  ...,  0.6157,  0.6000,  0.4039],
         [ 0.3176,  0.3490,  0.3961,  ...,  0.6000,  0.5765,  0.3804],
         [ 0.4275,  0.4510,  0.4824,  ...,  0.5373,  0.5294,  0.3490]],

        [[ 0.1216,  0.1608,  0.1608,  ..., -0.4275, -0.4667, -0.5451],
         [ 0.2706,  0.3098,  0.2941,  ..., -0.1686, -0.1686, -0.2627],
         [ 0.3255,  0.3647,  0.3725,  ...,  0.3412,  0.3569,  0.2078],
         ...,
         [ 0.2784,  0.3176,  0.3569,  ...,  0.4588,  0.4431,  0.2627],
         [ 0.2235,  0.2471,  0.2941,  ...,  0.4431,  0.4118,  0.2392],
         [ 0.2784,  0.2941,  0.3255,  ...,  0.3882,  0.3804,  0.2078]]

In [7]:
plt.clf()
print('----First model for picture----'+'\n')
model = VGG_A()
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()
VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
x = range(0, epo)
y = val_accuracy_curve
plt.plot(x, y, 'o-')
plt.title('valid accuracy vs epoches')
plt.ylabel('valid accuracy')
plt.xlabel('epoches')
plt.savefig("VGG_A_accuracy.jpg")



100%|██████████| 20/20 [04:50<00:00, 14.53s/epoch]

epoch:19, valid accuracy:0.7552, max valid accuracy:0.764, max valid accuracy epoch:14





In [6]:
print('----next model for picture----'+'\n')
model = VGG_A_BatchNorm()
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()
VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
x = range(0, epo)
y = val_accuracy_curve
plt.plot(x, y, 'o-')
plt.title('valid accuracy vs epoches')
plt.ylabel('valid accuracy')
plt.xlabel('epoches')
plt.savefig("VGG_A_BatchNorm_accuracy.jpg")

100%|██████████| 20/20 [06:06<00:00, 18.34s/epoch]

epoch:19, valid accuracy:0.8245, max valid accuracy:0.8251, max valid accuracy epoch:13





## loss landscape

In [5]:
print('----First model----'+'\n')

lr_list = [1e-3, 2e-3, 1e-4, 5e-4]
loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    #np.savetxt(os.path.join(loss_save_path, 'loss.txt'), VGG_A_loss, fmt='%s', delimiter=' ')
    #np.savetxt(os.path.join(grad_save_path, 'grads.txt'), VGG_A_grads, fmt='%s', delimiter=' ')
    loss_list.append(VGG_A_loss)
    grad_list.append(VGG_A_grads)

min_curve = []
max_curve = []

for epoch in range(epo):
    epochlen = len(loss_list[0][epoch])
    for loss in range(epochlen):
        max_loss = max(loss_list[0][epoch][loss],loss_list[1][epoch][loss],loss_list[2][epoch][loss],loss_list[3][epoch][loss])
        max_curve.append(max_loss)
        min_loss = min(loss_list[0][epoch][loss],loss_list[1][epoch][loss],loss_list[2][epoch][loss],loss_list[3][epoch][loss])
        min_curve.append(min_loss)
    

print('----Next model----'+'\n')

lr_list = [1e-3, 2e-3, 1e-4, 5e-4]
loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A_BatchNorm()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    loss_list.append(VGG_A_loss)
    grad_list.append(VGG_A_grads)


min_curve_BN = []
max_curve_BN = []
for epoch in range(epo):
    eplen = len(loss_list[0][epoch])
    for loss in range(eplen):
        max_loss = max(loss_list[0][epoch][loss],loss_list[1][epoch][loss],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        max_curve_BN.append(max_loss)
        min_loss = min(loss_list[0][epoch][ele],loss_list[1][epoch][ele],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        min_curve_BN.append(min_loss)

def write_file(ls,fname):
    f = open(fname, "w",encoding='UTF-8')
    i = 0
    for ele in ls:
        i = i+1
        f.write(str(ele)+'\t')
        if i % 100 == 0:
            f.write('\n')
    f.close()
    
write_file(min_curve_BN,'min_curve_BN.txt')
write_file(max_curve_BN,'max_curve_BN.txt')
write_file(min_curve,'min_curve.txt')
write_file(max_curve,'max_curve.txt')
# Use this function to plot the final loss landscape,
# fill the area between the two curves can use plt.fill_between()
# def plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve):
#     x = list(range(len(min_curve)))
#     x = np.array(x) 
#     min_curve_BN = np.array(min_curve_BN) 
#     max_curve_BN = np.array(max_curve_BN) 
#     min_curve = np.array(min_curve) 
#     max_curve = np.array(max_curve) 
    
#     plt.plot(x, min_curve, 'g')
#     plt.plot(x, max_curve, 'g')
#     p1 = plt.fill_between(x, min_curve, max_curve, facecolor="green", alpha=0.3)
    
#     plt.plot(x, min_curve_BN, 'r')
#     plt.plot(x, max_curve_BN, 'r')
#     p2 = plt.fill_between(x, min_curve_BN, max_curve_BN, facecolor="red", alpha=0.3)
    
#     l1 = plt.legend([p1, p2], ["VGG_A", "VGG_A_BatchNorm"], loc='upper right')
#     plt.title('Loss_landscape vs Steps')
#     plt.ylabel('Loss_landscape')
#     plt.xlabel('Steps')
#     plt.savefig("Loss_landscape_update.jpg")
#     plt.gca().add_artist(l1)
    

# plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve)

100%|██████████| 20/20 [05:56<00:00, 17.84s/epoch]

epoch:19, valid accuracy:0.8318, max valid accuracy:0.8374, max valid accuracy epoch:12





In [None]:
def plot_grad_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve):
    x = list(range(len(min_curve)))
    x = np.array(x) 
    min_curve_BN = np.array(min_curve_BN) 
    max_curve_BN = np.array(max_curve_BN) 
    min_curve = np.array(min_curve) 
    max_curve = np.array(max_curve) 
    
    ax1 = plt.subplot(1, 2, 1, frameon = False)
    plt.plot(x, min_curve, color = '#DB7093')
    plt.plot(x, max_curve, color = '#DB7093')
    p1 = plt.fill_between(x, min_curve, max_curve, facecolor="green", alpha=0.1)
    plt.title('Standard VGG')
    plt.ylabel('grad_landscape')
    plt.xlabel('Steps')
    
    plt.ylim((0, 6))
    
    ax2 = plt.subplot(1, 2, 2, frameon = False)
    plt.plot(x, min_curve_BN, color = '#98FB98')
    plt.plot(x, max_curve_BN, color = '#98FB98')
    p2 = plt.fill_between(x, min_curve_BN, max_curve_BN, facecolor="red", alpha=0.1)
    
    
    plt.title('Standard VGG + BatchNorm')
    plt.ylabel('grad_landscape')
    plt.xlabel('Steps')
    
    plt.ylim((0, 6))
    plt.savefig("grad_landscape.jpg")

plot_grad_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve)