# Part 4.2

## Train ResNet18 from scratch 

In [1]:
#Resnet18 without pretrain
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import pandas as pd

EPOCH = 30              
BATCH_SIZE = 2048         


# CIFAR10 Datasets download
train_data = datasets.CIFAR10(root='./data',
                         train=True,                         
                         transform=transforms.ToTensor(),    
                         download=True
                        )

test_data = datasets.CIFAR10(root='./data',
                        train=False,                         
                        transform=transforms.ToTensor(),     
                        download=True
                        )

# use dataloader to load the dataset
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

# ResNet18 without pretrain weight
model = torchvision.models.resnet18(pretrained=False)

# loss function
criterion = nn.CrossEntropyLoss()
# setting adam optimizer
optimizer = optim.Adam(model.parameters(), lr=2e-4)

#device : GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) 
Total_acc = 0


# start training
for epoch in range(EPOCH):
    train_loss = 0
    train_acc = 0
    model.train()
    
    for i, data in enumerate(train_loader, start=0):

        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)
        # forward propagation
        outputs = model(inputs)
        # caculate loss
        loss = criterion(outputs, labels)
        # clear gradient 
        optimizer.zero_grad()
        # back pro
        loss.backward()
        # update parameters
        optimizer.step()
        train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / inputs.shape[0]
        train_acc += acc

    model.eval() # change to eval mode
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # forward propagation
        out = model(images)
        loss = criterion(out, labels)
        eval_loss += loss.item()
        _, predicted = torch.max(out.data, 1)
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / images.shape[0]
        eval_acc += acc
    
    # save best model
    if eval_acc > Total_acc:
        torch.save(model, 'cifar10_resnet18_scratch.pt')
        print("saved best model")
        Total_acc = eval_acc
        
    # print the result
    print('epoch: {} \nTrain Loss: {:.4f} \nEval Loss: {:.4f} \nTrain Acc: {:.4f} \nEval Acc: {:.4f}'
          .format(epoch, 
                  train_loss / len(train_loader), 
                  eval_loss / len(test_loader), 
                  train_acc / len(train_loader),
                  eval_acc / len(test_loader)))


  from .autonotebook import tqdm as notebook_tqdm


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 170498071/170498071 [18:47<00:00, 151152.24it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified




saved best model
epoch: 0 
Train Loss: 3.5744 
Eval Loss: 4.2687 
Train Acc: 0.3180 
Eval Acc: 0.1023
saved best model
epoch: 1 
Train Loss: 1.4986 
Eval Loss: 2.3429 
Train Acc: 0.5005 
Eval Acc: 0.3024
saved best model
epoch: 2 
Train Loss: 1.2096 
Eval Loss: 1.3176 
Train Acc: 0.5789 
Eval Acc: 0.5410
saved best model
epoch: 3 
Train Loss: 1.0223 
Eval Loss: 1.2442 
Train Acc: 0.6429 
Eval Acc: 0.5622
epoch: 4 
Train Loss: 0.8429 
Eval Loss: 1.3224 
Train Acc: 0.7104 
Eval Acc: 0.5529
saved best model
epoch: 5 
Train Loss: 0.6647 
Eval Loss: 1.3066 
Train Acc: 0.7773 
Eval Acc: 0.5751
epoch: 6 
Train Loss: 0.4735 
Eval Loss: 1.4742 
Train Acc: 0.8476 
Eval Acc: 0.5586
saved best model
epoch: 7 
Train Loss: 0.3080 
Eval Loss: 1.5337 
Train Acc: 0.9075 
Eval Acc: 0.5767
epoch: 8 
Train Loss: 0.1843 
Eval Loss: 1.7560 
Train Acc: 0.9492 
Eval Acc: 0.5584
epoch: 9 
Train Loss: 0.1036 
Eval Loss: 1.8243 
Train Acc: 0.9761 
Eval Acc: 0.5681
saved best model
epoch: 10 
Train Loss: 0.0530 


## Train ResNet50 with pretrained weight

In [None]:
#Resnet50 with pretrain
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import pandas as pd

# hyperparameters setting
EPOCH = 30              
BATCH_SIZE = 2048         

resnet50_train_losses = []
resnet50_train_acces = []
resnet50_eval_losses = []
resnet50_eval_acces = []

# CIFAR10 Datasets download
train_data = datasets.CIFAR10(root='./data',
                         train=True,                         
                         transform=transforms.ToTensor(),    
                         download=True
                        )

test_data = datasets.CIFAR10(root='./data',
                        train=False,                         
                        transform=transforms.ToTensor(),     
                        download=True
                        )

# use dataloader to load the dataset
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

# ResNet50 with pretrain weight
model = torchvision.models.resnet50(pretrained=True)

# loss function
criterion = nn.CrossEntropyLoss()
# setting adam optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

#device : GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) 
Total_acc = 0


# start training
for epoch in range(EPOCH):
    train_loss = 0
    train_acc = 0
    model.train()
    
    for i, data in enumerate(train_loader, start=0):

        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)
        # forward propagation
        outputs = model(inputs)
        # caculate loss
        loss = criterion(outputs, labels)
        # clear gradient 
        optimizer.zero_grad()
        # back pro
        loss.backward()
        # update parameters
        optimizer.step()
        train_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / inputs.shape[0]
        train_acc += acc

    resnet50_train_losses.append(train_loss / len(train_loader))
    resnet50_train_acces.append(train_acc / len(train_loader))

    model.eval() # change to eval mode
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # forward propagation
        out = model(images)
        loss = criterion(out, labels)
        eval_loss += loss.item()
        _, predicted = torch.max(out.data, 1)
        num_correct = (predicted == labels).sum().item()
        acc = num_correct / images.shape[0]
        eval_acc += acc
    
    resnet50_eval_losses.append(eval_loss/len(test_loader))
    resnet50_eval_acces.append((eval_acc/len(test_loader)))
    
    # save best model
    if eval_acc > Total_acc:
        torch.save(model, 'cifar10_resnet50.pt')
        print("saved best model")
        Total_acc = eval_acc
        
    # print the result
    print('epoch: {} \nTrain Loss: {:.4f} \nEval Loss: {:.4f} \nTrain Acc: {:.4f} \nEval Acc: {:.4f}'
          .format(epoch, 
                  train_loss / len(train_loader), 
                  eval_loss / len(test_loader), 
                  train_acc / len(train_loader),
                  eval_acc / len(test_loader)))

    
# save data to csv file
array = [resnet50_train_losses,resnet50_train_acces,resnet50_eval_losses,resnet50_eval_acces]
df = pd.DataFrame(array,index = ['train_losses', 'train_acces','eval_losses','eval_acces'])
df.to_csv('./resnet50.csv')

## Evaluation with test set

This part of code can be use for multiple evaluations

In [None]:
# Evalution with test set
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pandas as pd


test_data = datasets.CIFAR10(root='./data',
                        train=False,                         
                        transform=transforms.ToTensor(),     
                        download=True
                        )

BATCH_SIZE=2048
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)
model = torch.load('drive/My Drive/9417/resnet18-vgg16/distill_resnet18_resnet50_T=3b=0.5.pt')  # change this line to evaluate diffrent models
model.eval() #change to eval mode

correct = 0
total = 0

for data in test_loader:
    images, labels = data
    images, labels = images.to(device), labels.to(device)
    
    out = model(images)
    
    _, predicted = torch.max(out.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

# print the testing result
print('Accuracy:{:.4f}%'.format(100 * correct / total))

## Train ResNet18 using knowledge distillation 

In [4]:
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import time
import pandas as pd

soft_loss = torch.nn.KLDivLoss(reduction="batchmean") 
hard_loss = nn.CrossEntropyLoss()

EPOCH = 85              
BATCH_SIZE = 2048         

temperature_list = [3, 5, 7, 10]
beta_list = [0.5,0.7]

for temp in temperature_list:
  
    for beta in beta_list:


        distill_resnet18_train_losses = []
        distill_resnet18_train_acces = []
        distill_resnet18_eval_losses = []
        distill_resnet18_eval_acces = []

        # load the teacher model
        teacher_model = torch.load('cifar10_resnet50_pretrain.pt') # you can access this model from https://drive.google.com/file/d/17zYxp_FfcVrkRd3UJb-uakfc4ddME0gA/view?usp=sharing
        teacher_model.eval() # use teacher model to produce soft targets 

        student_model = torchvision.models.resnet18(pretrained=False)
        
        train_data = datasets.CIFAR10(root='./data',
                                train=True,                        
                                transform=transforms.ToTensor(),    
                                download=False
                                )

        test_data = datasets.CIFAR10(root='./data',
                                train=False,                         
                                transform=transforms.ToTensor(),     
                                download=False
                                )

        # contruct dataloader
        train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

 
        # optimizer
        optimizer = optim.Adam(student_model.parameters(), lr=2e-3)

        #device : GPU or CPU
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        student_model.to(device) #将模型加载到相应设备中


        print("Now Trying: T = {}, beta = {}".format(temp,beta))

        Total_acc = 0.0  
        for epoch in range(EPOCH):
            train_loss = 0
            train_acc = 0
            student_model.train()

            for i, data in enumerate(train_loader, start=0):
                 
                inputs, labels = data
                    
                inputs, labels = inputs.to(device), labels.to(device)

                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)

                # using student model to produce hard targets
                student_outputs = student_model(inputs)
                
                # calculate hard loss
                student_loss = hard_loss(student_outputs, labels)
                
                # procduce soft loss
                distillation_loss = F.kl_div(F.log_softmax(student_outputs / temp, dim=1), F.softmax(teacher_outputs / temp, dim=1), reduction='batchmean') * temp * temp
                
                # combine soft loss and hard loss 
                loss = (1 - beta) * student_loss + beta * distillation_loss

                  
                optimizer.zero_grad()
                
                # back propagation
                loss.backward()
                
                optimizer.step()

                train_loss += loss.item()

                _, predicted = torch.max(student_outputs.data, 1)
                num_correct = (predicted == labels).sum().item()
                acc = num_correct / inputs.shape[0]
                train_acc += acc


        distill_resnet18_train_losses.append(train_loss / len(train_loader))
        distill_resnet18_train_acces.append(train_acc / len(train_loader))

        student_model.eval() # evaluation 
        eval_loss = 0
        eval_acc = 0

        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            
            out = student_model(images)
            loss = hard_loss(out, labels)
            eval_loss += loss.item()
            
            _, predicted = torch.max(out.data, 1)
            num_correct = (predicted == labels).sum().item()
            acc = num_correct / images.shape[0]
            eval_acc += acc


        distill_resnet18_eval_losses.append(eval_loss/len(test_loader))
        distill_resnet18_eval_acces.append((eval_acc/len(test_loader)))
        
        # save best model
        if eval_acc > Total_acc:
            torch.save(student_model, './distill_resnet18_fromResNet50_T={}b={}.pt'.format(temp,beta)) # you can change this line to set another save directory 
            print("saved best model")
            Total_acc = eval_acc

        print('epoch: {} \nTrain Loss: {:.4f} \nEval Loss: {:.4f} \nTrain Acc: {:.4f} \nEval Acc: {:.4f}'
                    .format(epoch, 
                    train_loss / len(train_loader), 
                    eval_loss / len(test_loader), 
                    train_acc / len(train_loader),
                    eval_acc / len(test_loader)))
        
        
        # save record to csv file
        array = [distill_resnet18_train_losses,distill_resnet18_train_acces,distill_resnet18_eval_losses,distill_resnet18_eval_acces]
        df = pd.DataFrame(array,index = ['train_losses', 'train_acces','eval_losses','eval_acces'])
        df.to_csv('./distill_resnet18-ResNet50_T={}beta={}.csv'.format(temp,beta)) # you can change this line to set another save directory 

Now Trying: T = 3, beta = 0.5
saved best model
epoch: 0 
Train Loss: 8.9219 
Eval Loss: 4.7406 
Train Acc: 0.3974 
Eval Acc: 0.1424




Now Trying: T = 3, beta = 0.7
saved best model
epoch: 0 
Train Loss: 11.4122 
Eval Loss: 4.5720 
Train Acc: 0.3924 
Eval Acc: 0.1705




Now Trying: T = 5, beta = 0.5


KeyboardInterrupt: 