# Problem 2-vgg accuracy compare

In [9]:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import torch
import os
import random
from tqdm import tqdm as tqdm
from IPython import display

from model.vgg import VGG_A
from model.vgg_batchnorm import VGG_A_BatchNorm
from torchvision import datasets, transforms

In [10]:
# train and evaluation function

# ## Constants (parameters) initialization
device_id = [0,1,2,3]
num_workers = 4
batch_size = 64

# add our package dir to path 
module_path = os.path.dirname(os.getcwd())
home_path = module_path
figures_path = os.path.join(home_path, 'reports', 'figures')
models_path = os.path.join(home_path, 'reports', 'models')


import os
os.environ["CUDA_VISIBLE_DEVICES"]="1,2"
# Make sure you are using the right device.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# # load dataset from cifar10
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=False, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


# Set a random seed to ensure reproducible results
def set_random_seeds(seed_value=0, device='cpu'):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    random.seed(seed_value)
    if device != 'cpu': 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


# This function is used to calculate the accuracy of model classification
def get_accuracy(pred,y):
    return pred.eq(y.view_as(pred)).sum().item()

# train function
def train(model, optimizer, criterion, train_loader, val_loader, scheduler=None, epochs_n=100):
    model.to(device)
    learning_curve = [0] * epochs_n
    train_accuracy_curve = [0] * epochs_n
    val_accuracy_curve = [0] * epochs_n
    max_val_accuracy = 0
    max_val_accuracy_epoch = 0

    batches_n = len(train_loader)
    loss_list = []
    grads = []
    for epoch in tqdm(range(epochs_n), unit='epoch'):
        if scheduler is not None:
            scheduler.step()
        model.train()

        loss_list = []  # use this to record the loss value of each step
        grad = []  # use this to record the loss gradient of each step
        learning_curve[epoch] = 0  # maintain this to plot the training curve

        for data in train_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            loss_list.append(loss.item())
            temp = model.classifier[4].weight.grad.clone()
            grad.append(temp)
            pred = prediction.argmax(dim = 1)

            loss.backward()
            optimizer.step()

        loss_list.append(loss_list)
        grads.append(grad)
        display.clear_output(wait=True)
        f, axes = plt.subplots(1, 2, figsize=(15, 3))

        learning_curve[epoch] /= batches_n
        axes[0].plot(learning_curve)

        model.eval()
        batches_n = len(val_loader.dataset)
        for data in val_loader:
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            prediction = model(x)
            loss = criterion(prediction, y)
            pred = prediction.argmax(dim = 1)
            val_accuracy_curve[epoch] += get_accuracy(pred,y)
            
        val_accuracy_curve[epoch]  = val_accuracy_curve[epoch] /batches_n
        if max_val_accuracy < val_accuracy_curve[epoch]:
            max_val_accuracy = val_accuracy_curve[epoch]
            max_val_accuracy_epoch = epoch
        
        print("epoch:{}, valid accuracy:{}, max valid accuracy:{}, max valid accuracy epoch:{}".format(epoch, val_accuracy_curve[epoch], max_val_accuracy,max_val_accuracy_epoch))
    

    return loss_list, grads, val_accuracy_curve

# change file save path
loss_save_path = ''
grad_save_path = ''

# set random seed here 
set_random_seeds(seed_value=1234, device=device)


cuda
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [11]:

def grad_cal(grad):
    r = []
    l = len(grad)
    for i in range(l-1):
        print(grad[i])
        g1 = grad[i].cpu().numpy()
        g2 = grad[i+1].cpu().numpy()
        g_norm = np.linalg.norm(g2-g1)
        r.append(g_norm)
    return r
    
def VGG_Grad_Pred(VGG_A_grads):
    r = []
    l = len(VGG_A_grads)
    for i in range(l):
        temp = grad_cal(VGG_A_grads[i])
        r.append(temp)
    return r


## loss landscape

In [12]:
print('----First model----'+'\n')
torch.cuda.empty_cache()

epo = 5
lr_list = [2e-3, 1e-3, 5e-4, 1e-4]
loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    # np.savetxt(os.path.join(loss_save_path, 'loss.txt'), VGG_A_loss, fmt='%s', delimiter=' ')
    # np.savetxt(os.path.join(grad_save_path, 'grads.txt'), VGG_A_grads.cpu().numpy(), fmt='%s', delimiter=' ')
    loss_list.append(VGG_A_loss)
    grads_l2_dist = VGG_Grad_Pred(VGG_A_grads)
    grad_list.append(grads_l2_dist)

min_grad_curve = []
max_grad_curve = []
max_curve=[]
min_curve=[]

for epoch in range(epo):
    eplen = len(loss_list[0][epoch])
    for ele in range(eplen):
        max_loss = max(loss_list[0][epoch][ele],loss_list[1][epoch][ele],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        max_curve.append(max_loss)
        min_loss = min(loss_list[0][epoch][ele],loss_list[1][epoch][ele],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        min_curve.append(min_loss)
        max_grad = max(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        max_grad_curve.append(max_grad)
        min_grad = min(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        min_grad_curve.append(min_grad)
    
def write_file(ls,fname):
    f = open(fname, "w",encoding='UTF-8')
    i = 0
    for ele in ls:
        i = i+1
        f.write(str(ele)+'\t')
        if i % 100 == 0:
            f.write('\n')
    f.close()
    

100%|██████████| 5/5 [01:00<00:00, 12.12s/epoch]

epoch:4, valid accuracy:0.6584, max valid accuracy:0.6584, max valid accuracy epoch:4
None





AttributeError: 'NoneType' object has no attribute 'cpu'

In [None]:
print('----Next model----'+'\n')


loss_list = []
grad_list = []
for lr in lr_list:
    model = VGG_A_BatchNorm()
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()
    VGG_A_loss, VGG_A_grads, val_accuracy_curve = train(model, optimizer, criterion, train_loader, val_loader, epochs_n=epo)
    loss_list.append(VGG_A_loss)
    grads_l2_dist = VGG_Grad_Pred(VGG_A_grads)
    grad_list.append(grads_l2_dist)


min_curve_BN = []
max_curve_BN = []
min_grad_curve_BN = []
max_grad_curve_BN = []

for epoch in range(epo):
    eplen = len(loss_list[0][epoch])
    for ele in range(eplen):
        max_loss = max(loss_list[0][epoch][ele],loss_list[1][epoch][ele],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        max_curve_BN.append(max_loss)
        min_loss = min(loss_list[0][epoch][ele],loss_list[1][epoch][ele],loss_list[2][epoch][ele],loss_list[3][epoch][ele])
        min_curve_BN.append(min_loss)
        max_grad = max(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        max_grad_curve_BN.append(max_grad)
        min_grad = min(grad_list[0][epoch][ele],grad_list[1][epoch][ele],grad_list[2][epoch][ele],grad_list[3][epoch][ele])
        min_grad_curve_BN.append(min_grad)

In [None]:
write_file(min_curve_BN,'min_grad_curve_BN.txt')
write_file(max_curve_BN,'max_grad_curve_BN.txt')
write_file(min_curve,'min_grad_curve.txt')
write_file(max_curve,'max_grad_curve.txt')
write_file(min_curve_BN,'min_loss_curve_BN.txt')
write_file(max_curve_BN,'max_loss_curve_BN.txt')
write_file(min_curve,'min_loss_curve.txt')
write_file(max_curve,'max_loss_curve.txt')

In [None]:
# plot function
def plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve):
    plt.clf()
    x = list(range(len(min_curve)))
    x = np.array(x) 
    min_curve_BN = np.array(min_curve_BN) 
    max_curve_BN = np.array(max_curve_BN) 
    min_curve = np.array(min_curve) 
    max_curve = np.array(max_curve) 
    
    plt.subplot(1,1,1)
    plt.ylim(0,8)
    plt.plot(x, min_curve,'b')
    plt.plot(x, max_curve, 'b')
    p1 = plt.fill_between(x, min_curve, max_curve, facecolor="blue", alpha=1)
    
    plt.plot(x, min_curve_BN, 'r')
    plt.plot(x, max_curve_BN, 'r')
    p2 = plt.fill_between(x, min_curve_BN, max_curve_BN, facecolor="red", alpha=1)
    
    l1 = plt.legend([p1, p2], ["VGG_A", "VGG_A_BatchNorm"], loc='upper right')
    plt.title('Loss_landscape vs Steps')
    plt.ylabel('Loss_landscape')
    plt.xlabel('Steps')
    plt.savefig("Loss_landscape_update.jpg")
    plt.gca().add_artist(l1)
    
def ReadFile(address):
    f = open(address, encoding='UTF-8')
    line = f.readline()
    ls = []
    while line:
        line_ = line.replace('\n','')
        line_ = line_.split('\t')
        line_ = line_[:-1]
        line_ = list(map(float,line_))
        ls = ls + line_
        line = f.readline()
    f.close()
    return ls

PATH='/home/newdisk/zxy/pj2/codes_for_pj/section2/draw/Loss/'
min_curve_BN = ReadFile('min_curve_BN.txt') 
max_curve_BN = ReadFile('max_curve_BN.txt')
min_curve = ReadFile('min_curve.txt')
max_curve = ReadFile('max_curve.txt')

plot_loss_landscape(min_curve_BN, max_curve_BN, min_curve, max_curve)