# Part 4.3

## Train VGG16 with pretrained weight

In [None]:
#VGG16 with pretrain
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import pandas as pd


EPOCH = 30              
BATCH_SIZE = 2048        

VGG16_train_losses = []
VGG16_train_acces = []
VGG16_eval_losses = []
VGG16_eval_acces = []

# download CIFAR10 datasets
train_data = datasets.CIFAR10(root='./data',
                         train=True,                         
                         transform=transforms.ToTensor(),    
                         download=True
                        )

test_data = datasets.CIFAR10(root='./data',
                        train=False,                         
                        transform=transforms.ToTensor(),     
                        download=True
                        )


train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

# VGG16 Model with pretrained weight
model = torchvision.models.vgg16(pretrained=True) # 使用resnet18模型

# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

#device : GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) 
Total_acc = 0


# start training
for epoch in range(EPOCH):
    train_loss = 0
    train_acc = 0
    model.train()
    for i, data in enumerate(train_loader, start=0):
        
        inputs, labels = data
        
        inputs, labels = inputs.to(device), labels.to(device)
        # forward propagation
        outputs = model(inputs)
        # calculate loss
        loss = criterion(outputs, labels)
        # clear grads
        optimizer.zero_grad()
        # back propagation
        loss.backward()
        # update parameters
        optimizer.step()
        train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / inputs.shape[0]
        train_acc += acc

    VGG16_train_losses.append(train_loss / len(train_loader))
    VGG16_train_acces.append(train_acc / len(train_loader))

    model.eval() # change to eval mode
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # forward
        out = model(images)
        loss = criterion(out, labels)
        eval_loss += loss.item()
        
        _, predicted = torch.max(out.data, 1)
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / images.shape[0]
        eval_acc += acc
    
    VGG16_eval_losses.append(eval_loss/len(test_loader))
    VGG16_eval_acces.append((eval_acc/len(test_loader)))
    if eval_acc > Total_acc:
        torch.save(model, 'cifar10_vgg16_pretrain.pt')
        print("saved best model")
        Total_acc = eval_acc
    
    print('epoch: {} \nTrain Loss: {:.4f} \nEval Loss: {:.4f} \nTrain Acc: {:.4f} \nEval Acc: {:.4f}'
          .format(epoch, 
                  train_loss / len(train_loader), 
                  eval_loss / len(test_loader), 
                  train_acc / len(train_loader),
                  eval_acc / len(test_loader)))

array = [VGG16_train_losses,VGG16_train_acces,VGG16_eval_losses,VGG16_eval_acces]
df = pd.DataFrame(array,index = ['train_losses', 'train_acces','eval_losses','eval_acces'])
df.to_csv('./vgg16_pretrain.csv')

## Evaluation with test set

This part of code can be use for multiple evaluations

In [None]:
# Evalution with test set
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pandas as pd


test_data = datasets.CIFAR10(root='./data',
                        train=False,                         
                        transform=transforms.ToTensor(),     
                        download=True
                        )

BATCH_SIZE=2048
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)
model = torch.load('drive/My Drive/9417/resnet18-vgg16/distill_resnet18_resnet50_T=3b=0.5.pt')  # change this line to evaluate diffrent models
model.eval() #change to eval mode

correct = 0
total = 0

for data in test_loader:
    images, labels = data
    images, labels = images.to(device), labels.to(device)
    
    out = model(images)
    
    _, predicted = torch.max(out.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

# print the testing result
print('Accuracy:{:.4f}%'.format(100 * correct / total))

## Train ResNet18 using knowledge distillation 

In [2]:
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import pandas as pd

soft_loss = torch.nn.KLDivLoss(reduction="batchmean") 
hard_loss = nn.CrossEntropyLoss()

EPOCH = 1              
BATCH_SIZE = 2048         

temperature_list = [3, 5, 7, 10]
beta_list = [0.5,0.7]

for temp in temperature_list:
  
    for beta in beta_list:


        distill_resnet18_train_losses = []
        distill_resnet18_train_acces = []
        distill_resnet18_eval_losses = []
        distill_resnet18_eval_acces = []

        # load the teacher model
        teacher_model = torch.load('cifar10_vgg16_pretrain.pt') # you can access this model from https://drive.google.com/file/d/17zYxp_FfcVrkRd3UJb-uakfc4ddME0gA/view?usp=sharing
        teacher_model.eval() # use teacher model to produce soft targets 

        student_model = torchvision.models.resnet18(pretrained=False)
        
        train_data = datasets.CIFAR10(root='./data',
                                train=True,                        
                                transform=transforms.ToTensor(),    
                                download=False
                                )

        test_data = datasets.CIFAR10(root='./data',
                                train=False,                         
                                transform=transforms.ToTensor(),     
                                download=False
                                )

        # contruct dataloader
        train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

 
        # optimizer
        optimizer = optim.Adam(student_model.parameters(), lr=2e-3)

        #device : GPU or CPU
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        student_model.to(device) #将模型加载到相应设备中


        print("Now Trying: T = {}, beta = {}".format(temp,beta))

        Total_acc = 0.0  
        for epoch in range(EPOCH):
            train_loss = 0
            train_acc = 0
            student_model.train()

            for i, data in enumerate(train_loader, start=0):
                 
                inputs, labels = data
                    
                inputs, labels = inputs.to(device), labels.to(device)

                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)

                # using student model to produce hard targets
                student_outputs = student_model(inputs)
                
                # calculate hard loss
                student_loss = hard_loss(student_outputs, labels)
                
                # procduce soft loss
                distillation_loss = F.kl_div(F.log_softmax(student_outputs / temp, dim=1), F.softmax(teacher_outputs / temp, dim=1), reduction='batchmean') * temp * temp
                
                # combine soft loss and hard loss 
                loss = (1 - beta) * student_loss + beta * distillation_loss

                  
                optimizer.zero_grad()
                
                # back propagation
                loss.backward()
                
                optimizer.step()

                train_loss += loss.item()

                _, predicted = torch.max(student_outputs.data, 1)
                num_correct = (predicted == labels).sum().item()
                acc = num_correct / inputs.shape[0]
                train_acc += acc


        distill_resnet18_train_losses.append(train_loss / len(train_loader))
        distill_resnet18_train_acces.append(train_acc / len(train_loader))

        student_model.eval() # evaluation 
        eval_loss = 0
        eval_acc = 0

        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            
            out = student_model(images)
            loss = hard_loss(out, labels)
            eval_loss += loss.item()
            
            _, predicted = torch.max(out.data, 1)
            num_correct = (predicted == labels).sum().item()
            acc = num_correct / images.shape[0]
            eval_acc += acc


        distill_resnet18_eval_losses.append(eval_loss/len(test_loader))
        distill_resnet18_eval_acces.append((eval_acc/len(test_loader)))
        
        # save best model
        if eval_acc > Total_acc:
            torch.save(student_model, './distill_resnet18_fromVGG16_T={}b={}.pt'.format(temp,beta)) # you can change this line to set another save directory 
            print("saved best model")
            Total_acc = eval_acc

        print('epoch: {} \nTrain Loss: {:.4f} \nEval Loss: {:.4f} \nTrain Acc: {:.4f} \nEval Acc: {:.4f}'
                    .format(epoch, 
                    train_loss / len(train_loader), 
                    eval_loss / len(test_loader), 
                    train_acc / len(train_loader),
                    eval_acc / len(test_loader)))
        
        
        # save record to csv file
        array = [distill_resnet18_train_losses,distill_resnet18_train_acces,distill_resnet18_eval_losses,distill_resnet18_eval_acces]
        df = pd.DataFrame(array,index = ['train_losses', 'train_acces','eval_losses','eval_acces'])
        df.to_csv('./distill_resnet18-VGG16_T={}beta={}.csv'.format(temp,beta)) # you can change this line to set another save directory 

  from .autonotebook import tqdm as notebook_tqdm


Now Trying: T = 3, beta = 0.5
saved best model
epoch: 0 
Train Loss: 8.9422 
Eval Loss: 4.1816 
Train Acc: 0.3919 
Eval Acc: 0.1998




Now Trying: T = 3, beta = 0.7


KeyboardInterrupt: 