**Part 2**

In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import os
datadir = "/content/assignment3_part1"
if not os.path.exists(datadir):
  !ln -s "/content/drive/MyDrive/CS444/assignment3_part1/" $datadir # TODO: Fill your A3 path
os.chdir(datadir)

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import random
from PIL import Image
import torchvision.transforms.functional as TF

def rotate_img(img, rot):
    if rot == 0: # 0 degrees rotation
        return img
    elif rot==1:
      return TF.rotate(img, 90)
    elif rot==2:
      return TF.rotate(img, 180)
    elif rot==3:
      return TF.rotate(img, 270)    
    else:
        raise ValueError('rotation should be 0, 90, 180, or 270 degrees')

class CIFAR10Rotation(torchvision.datasets.CIFAR10):

    def __init__(self, root, train, download, transform) -> None:
        super().__init__(root=root, train=train, download=download, transform=transform)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index: int):
        image, cls_label = super().__getitem__(index)

        # randomly select image rotation
        rotation_label = random.choice([0, 1, 2, 3])
        image_rotated = rotate_img(image, rotation_label)

        rotation_label = torch.tensor(rotation_label).long()
        return image, image_rotated, rotation_label, torch.tensor(cls_label).long()

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
from torchvision.datasets import CIFAR10
import numpy as np


# Define the batch size
batch_size = 128

# Define the transformations to apply to the data
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(), # Randomly flip the input image horizontally
    transforms.ToTensor(), # Convert the input image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the input image
])
transform_test = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # Resize the input image to 224x224
    transforms.ToTensor(), # Convert the input image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the input image
])

# Load the CIFAR10 dataset
trainset = CIFAR10Rotation(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = CIFAR10Rotation(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)



Files already downloaded and verified
Files already downloaded and verified


In [5]:
import time

def run_test(net, testloader, criterion, task):
    correct = 0
    total = 0
    avg_test_loss = 0.0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for images, images_rotated, labels, cls_labels in testloader:
            if task == 'rotation':
              images, labels = images_rotated.to(device), labels.to(device)
            elif task == 'classification':
              images, labels = images.to(device), cls_labels.to(device)
            # TODO: Calculate outputs by running images through the network
            # The class with the highest energy is what we choose as prediction
            outputs=net(images)
            _,predicted=torch.max(outputs.data,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
            # loss
            avg_test_loss += criterion(outputs, labels)  / len(testloader)
    print('TESTING:')
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f} %')
    print(f'Average loss on the 10000 test images: {avg_test_loss:.3f}')

In [6]:
def adjust_learning_rate(optimizer, epoch, init_lr, decay_epochs=30):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = init_lr * (0.1 ** (epoch // decay_epochs))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [7]:
# Both the self-supervised rotation task and supervised CIFAR10 classification are
# trained with the CrossEntropyLoss, so we can use the training loop code.

def train(net, criterion, optimizer, num_epochs, decay_epochs, init_lr, task):

    for epoch in range(num_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        running_correct = 0.0
        running_total = 0.0
        start_time = time.time()

        net.train()

        for i, (imgs, imgs_rotated, rotation_label, cls_label) in enumerate(trainloader, 0):
            adjust_learning_rate(optimizer, epoch, init_lr, decay_epochs)

            # TODO: Set the data to the correct device; Different task will use different inputs and labels                      
            if task=="rotation":
              imgs=imgs_rotated.to(device)
              labels=rotation_label.to(device)
            elif task=='classification':
              imgs=imgs.to(device)  
              labels=cls_label.to(device)

            # TODO: Zero the parameter gradients
            optimizer.zero_grad()

            # TODO: forward + backward + optimize
            outputs=net(imgs)
            loss=criterion(outputs,labels)
            loss.backward()
            optimizer.step()

            # TODO: Get predicted results
            _,predicted = torch.max(outputs.data,1)

            # print statistics
            print_freq = 100
            running_loss += loss.item()

            # calc acc
            running_total += labels.size(0)
            running_correct += (predicted == labels).sum().item()

            if i % print_freq == (print_freq - 1):    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / print_freq:.3f} acc: {100*running_correct / running_total:.2f} time: {time.time() - start_time:.2f}')
                running_loss, running_correct, running_total = 0.0, 0.0, 0.0
                start_time = time.time()

        # TODO: Run the run_test() function after each epoch; Set the model to the evaluation mode.
        net.eval()
        run_test(net,testloader,criterion,task)

    print('Finished Training')

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch.nn as nn
import torch.nn.functional as F

from torchvision.models import resnet50

net = resnet50(num_classes=4)
net = net.to(device)

import torch.optim as optim
criterion = None
optimizer = None

# TODO: Define criterion and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=0.001)

In [10]:
train(net, criterion, optimizer, num_epochs=45, decay_epochs=15, init_lr=0.01, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50.pt')

[1,   100] loss: 2.501 acc: 25.77 time: 18.34
[1,   200] loss: 1.637 acc: 28.97 time: 9.23
[1,   300] loss: 1.408 acc: 36.47 time: 10.66
TESTING:
Accuracy of the network on the 10000 test images: 28.27 %
Average loss on the 10000 test images: 1.457
[2,   100] loss: 1.394 acc: 33.80 time: 10.67
[2,   200] loss: 1.309 acc: 39.56 time: 10.39
[2,   300] loss: 1.246 acc: 42.57 time: 9.57
TESTING:
Accuracy of the network on the 10000 test images: 44.61 %
Average loss on the 10000 test images: 1.240
[3,   100] loss: 1.184 acc: 45.80 time: 10.92
[3,   200] loss: 1.178 acc: 45.85 time: 11.11
[3,   300] loss: 1.144 acc: 48.20 time: 10.42
TESTING:
Accuracy of the network on the 10000 test images: 49.03 %
Average loss on the 10000 test images: 1.136
[4,   100] loss: 1.140 acc: 48.82 time: 9.47
[4,   200] loss: 1.129 acc: 49.35 time: 10.61
[4,   300] loss: 1.110 acc: 50.32 time: 10.72
TESTING:
Accuracy of the network on the 10000 test images: 51.57 %
Average loss on the 10000 test images: 1.102
[5,

In [12]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=15, init_lr=0.01, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_2.pt')

[1,   100] loss: 1.040 acc: 57.42 time: 11.04
[1,   200] loss: 1.314 acc: 47.27 time: 10.86
[1,   300] loss: 1.114 acc: 52.77 time: 9.40
TESTING:
Accuracy of the network on the 10000 test images: 56.73 %
Average loss on the 10000 test images: 1.005
[2,   100] loss: 1.013 acc: 58.34 time: 10.65
[2,   200] loss: 1.110 acc: 53.83 time: 10.11
[2,   300] loss: 0.998 acc: 58.86 time: 11.48
TESTING:
Accuracy of the network on the 10000 test images: 55.57 %
Average loss on the 10000 test images: 1.077
[3,   100] loss: 0.979 acc: 59.21 time: 11.33
[3,   200] loss: 0.946 acc: 60.98 time: 10.95
[3,   300] loss: 1.119 acc: 54.46 time: 9.68
TESTING:
Accuracy of the network on the 10000 test images: 50.17 %
Average loss on the 10000 test images: 1.248
[4,   100] loss: 1.161 acc: 50.71 time: 10.78
[4,   200] loss: 1.065 acc: 54.88 time: 10.97
[4,   300] loss: 0.998 acc: 57.95 time: 11.01
TESTING:
Accuracy of the network on the 10000 test images: 59.77 %
Average loss on the 10000 test images: 0.968
[5

In [20]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_2.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=10, decay_epochs=5, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_3.pt')

[1,   100] loss: 0.646 acc: 74.00 time: 10.41
[1,   200] loss: 0.664 acc: 73.36 time: 10.05
[1,   300] loss: 0.647 acc: 74.41 time: 11.54
TESTING:
Accuracy of the network on the 10000 test images: 73.23 %
Average loss on the 10000 test images: 0.669
[2,   100] loss: 0.641 acc: 74.48 time: 11.27
[2,   200] loss: 0.645 acc: 74.31 time: 10.76
[2,   300] loss: 0.646 acc: 74.27 time: 9.46
TESTING:
Accuracy of the network on the 10000 test images: 73.46 %
Average loss on the 10000 test images: 0.661
[3,   100] loss: 0.635 acc: 74.79 time: 10.75
[3,   200] loss: 0.643 acc: 74.46 time: 10.94
[3,   300] loss: 0.646 acc: 74.55 time: 10.74
TESTING:
Accuracy of the network on the 10000 test images: 73.88 %
Average loss on the 10000 test images: 0.652
[4,   100] loss: 0.651 acc: 74.35 time: 10.43
[4,   200] loss: 0.635 acc: 74.61 time: 9.69
[4,   300] loss: 0.626 acc: 75.23 time: 10.84
TESTING:
Accuracy of the network on the 10000 test images: 74.23 %
Average loss on the 10000 test images: 0.646
[5

In [21]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_3.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=15, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_4.pt')

[1,   100] loss: 0.630 acc: 75.20 time: 11.30
[1,   200] loss: 0.637 acc: 74.59 time: 9.05
[1,   300] loss: 0.623 acc: 75.45 time: 10.87
TESTING:
Accuracy of the network on the 10000 test images: 74.12 %
Average loss on the 10000 test images: 0.646
[2,   100] loss: 0.631 acc: 75.05 time: 11.33
[2,   200] loss: 0.637 acc: 74.74 time: 10.98
[2,   300] loss: 0.625 acc: 75.20 time: 9.70
TESTING:
Accuracy of the network on the 10000 test images: 75.17 %
Average loss on the 10000 test images: 0.635
[3,   100] loss: 0.616 acc: 75.78 time: 9.85
[3,   200] loss: 0.621 acc: 75.25 time: 10.99
[3,   300] loss: 0.631 acc: 75.11 time: 11.03
TESTING:
Accuracy of the network on the 10000 test images: 74.35 %
Average loss on the 10000 test images: 0.644
[4,   100] loss: 0.624 acc: 75.49 time: 11.27
[4,   200] loss: 0.633 acc: 75.03 time: 9.29
[4,   300] loss: 0.603 acc: 76.48 time: 10.66
TESTING:
Accuracy of the network on the 10000 test images: 74.63 %
Average loss on the 10000 test images: 0.635
[5, 

In [22]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_4.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=10, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_5.pt')

[1,   100] loss: 0.568 acc: 77.79 time: 10.78
[1,   200] loss: 0.583 acc: 76.84 time: 8.59
[1,   300] loss: 0.585 acc: 77.27 time: 10.42
TESTING:
Accuracy of the network on the 10000 test images: 76.03 %
Average loss on the 10000 test images: 0.602
[2,   100] loss: 0.576 acc: 77.27 time: 10.85
[2,   200] loss: 0.571 acc: 77.26 time: 10.43
[2,   300] loss: 0.577 acc: 77.09 time: 9.58
TESTING:
Accuracy of the network on the 10000 test images: 76.39 %
Average loss on the 10000 test images: 0.603
[3,   100] loss: 0.589 acc: 76.48 time: 10.89
[3,   200] loss: 0.570 acc: 77.74 time: 10.45
[3,   300] loss: 0.573 acc: 77.82 time: 9.35
TESTING:
Accuracy of the network on the 10000 test images: 76.43 %
Average loss on the 10000 test images: 0.603
[4,   100] loss: 0.573 acc: 77.34 time: 9.57
[4,   200] loss: 0.577 acc: 77.49 time: 11.05
[4,   300] loss: 0.573 acc: 77.56 time: 10.78
TESTING:
Accuracy of the network on the 10000 test images: 76.14 %
Average loss on the 10000 test images: 0.600
[5, 

In [23]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_5.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=10, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_6.pt')

[1,   100] loss: 0.551 acc: 78.20 time: 10.92
[1,   200] loss: 0.555 acc: 78.38 time: 10.57
[1,   300] loss: 0.545 acc: 78.87 time: 9.27
TESTING:
Accuracy of the network on the 10000 test images: 77.01 %
Average loss on the 10000 test images: 0.582
[2,   100] loss: 0.553 acc: 78.05 time: 10.19
[2,   200] loss: 0.555 acc: 78.28 time: 10.84
[2,   300] loss: 0.541 acc: 78.69 time: 10.87
TESTING:
Accuracy of the network on the 10000 test images: 77.44 %
Average loss on the 10000 test images: 0.583
[3,   100] loss: 0.550 acc: 78.20 time: 10.66
[3,   200] loss: 0.546 acc: 78.87 time: 9.55
[3,   300] loss: 0.547 acc: 78.29 time: 10.85
TESTING:
Accuracy of the network on the 10000 test images: 76.65 %
Average loss on the 10000 test images: 0.582
[4,   100] loss: 0.547 acc: 78.74 time: 11.14
[4,   200] loss: 0.545 acc: 78.74 time: 10.35
[4,   300] loss: 0.552 acc: 78.28 time: 9.82
TESTING:
Accuracy of the network on the 10000 test images: 77.41 %
Average loss on the 10000 test images: 0.569
[5,

In [9]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_6.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=10, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_7.pt')

[1,   100] loss: 0.527 acc: 79.06 time: 17.26
[1,   200] loss: 0.534 acc: 78.99 time: 10.77
[1,   300] loss: 0.528 acc: 79.24 time: 10.45
TESTING:
Accuracy of the network on the 10000 test images: 77.83 %
Average loss on the 10000 test images: 0.555
[2,   100] loss: 0.521 acc: 79.34 time: 9.80
[2,   200] loss: 0.539 acc: 78.88 time: 10.35
[2,   300] loss: 0.528 acc: 79.48 time: 10.82
TESTING:
Accuracy of the network on the 10000 test images: 77.99 %
Average loss on the 10000 test images: 0.556
[3,   100] loss: 0.533 acc: 79.45 time: 10.85
[3,   200] loss: 0.525 acc: 79.73 time: 8.96
[3,   300] loss: 0.524 acc: 79.77 time: 10.39
TESTING:
Accuracy of the network on the 10000 test images: 78.47 %
Average loss on the 10000 test images: 0.558
[4,   100] loss: 0.519 acc: 79.78 time: 10.75
[4,   200] loss: 0.523 acc: 79.61 time: 10.30
[4,   300] loss: 0.538 acc: 78.66 time: 9.03
TESTING:
Accuracy of the network on the 10000 test images: 77.93 %
Average loss on the 10000 test images: 0.555
[5,

In [10]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_7.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=30, decay_epochs=10, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_8.pt')

[1,   100] loss: 0.501 acc: 80.91 time: 11.35
[1,   200] loss: 0.521 acc: 80.09 time: 10.72
[1,   300] loss: 0.508 acc: 80.45 time: 9.08
TESTING:
Accuracy of the network on the 10000 test images: 78.24 %
Average loss on the 10000 test images: 0.548
[2,   100] loss: 0.505 acc: 80.34 time: 9.64
[2,   200] loss: 0.513 acc: 80.07 time: 10.58
[2,   300] loss: 0.503 acc: 80.70 time: 10.54
TESTING:
Accuracy of the network on the 10000 test images: 78.18 %
Average loss on the 10000 test images: 0.550
[3,   100] loss: 0.503 acc: 80.27 time: 9.90
[3,   200] loss: 0.507 acc: 80.02 time: 9.71
[3,   300] loss: 0.516 acc: 79.52 time: 10.45
[4,   100] loss: 0.504 acc: 80.70 time: 10.63
[4,   200] loss: 0.494 acc: 80.74 time: 9.07
[4,   300] loss: 0.508 acc: 80.35 time: 10.20
TESTING:
Accuracy of the network on the 10000 test images: 78.92 %
Average loss on the 10000 test images: 0.539
[5,   100] loss: 0.513 acc: 80.12 time: 10.43
[5,   200] loss: 0.497 acc: 80.73 time: 10.47
[5,   300] loss: 0.507 ac

In [11]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_resnet50_8.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=20, decay_epochs=10, init_lr=0.001, task='rotation')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_resnet50_9.pt')

[1,   100] loss: 0.492 acc: 80.66 time: 10.81
[1,   200] loss: 0.489 acc: 80.84 time: 10.62
[1,   300] loss: 0.485 acc: 81.16 time: 8.91
TESTING:
Accuracy of the network on the 10000 test images: 79.46 %
Average loss on the 10000 test images: 0.530
[2,   100] loss: 0.494 acc: 80.61 time: 10.85
[2,   200] loss: 0.490 acc: 80.52 time: 10.46
[2,   300] loss: 0.506 acc: 80.02 time: 10.85
TESTING:
Accuracy of the network on the 10000 test images: 79.30 %
Average loss on the 10000 test images: 0.530
[3,   100] loss: 0.479 acc: 81.45 time: 9.47
[3,   200] loss: 0.488 acc: 81.41 time: 10.49
[3,   300] loss: 0.494 acc: 80.91 time: 10.56
TESTING:
Accuracy of the network on the 10000 test images: 79.68 %
Average loss on the 10000 test images: 0.524
[4,   100] loss: 0.484 acc: 81.32 time: 10.89
[4,   200] loss: 0.492 acc: 80.96 time: 8.84
[4,   300] loss: 0.496 acc: 80.77 time: 10.38
TESTING:
Accuracy of the network on the 10000 test images: 79.45 %
Average loss on the 10000 test images: 0.524
[5,

## **Fine-tuning on the pre-trained model**
In this section, we will load the pre-trained ResNet18 model and fine-tune on the classification task. We will freeze all previous layers except for the 'layer4' block and 'fc' layer.

In [8]:
import torch.nn as nn
import torch.nn.functional as F

from torchvision.models import resnet18

# TODO: Load the pre-trained ResNet18 model
model_path = 'my_model_resnet50_9.pt'
net = torchvision.models.resnet50()
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 4)
net.load_state_dict(torch.load(model_path))
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 10)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=0.001)
train(net, criterion, optimizer, num_epochs=20, decay_epochs=10, init_lr=0.01, task='classification')

[1,   100] loss: 1.615 acc: 38.05 time: 17.61
[1,   200] loss: 1.256 acc: 53.82 time: 10.25
[1,   300] loss: 1.148 acc: 58.66 time: 10.54
TESTING:
Accuracy of the network on the 10000 test images: 62.00 %
Average loss on the 10000 test images: 1.082
[2,   100] loss: 1.020 acc: 63.91 time: 11.14
[2,   200] loss: 0.957 acc: 66.18 time: 9.09
[2,   300] loss: 0.942 acc: 67.20 time: 10.80
TESTING:
Accuracy of the network on the 10000 test images: 67.36 %
Average loss on the 10000 test images: 0.936
[3,   100] loss: 0.873 acc: 69.26 time: 10.69
[3,   200] loss: 0.853 acc: 70.28 time: 10.57
[3,   300] loss: 0.840 acc: 71.12 time: 8.89
TESTING:
Accuracy of the network on the 10000 test images: 69.89 %
Average loss on the 10000 test images: 0.851
[4,   100] loss: 0.785 acc: 73.00 time: 10.65
[4,   200] loss: 0.818 acc: 71.73 time: 10.71
[4,   300] loss: 0.821 acc: 71.36 time: 10.18
TESTING:
Accuracy of the network on the 10000 test images: 71.20 %
Average loss on the 10000 test images: 0.831
[5

In [10]:
torch.save(net.state_dict(), 'my_model_rotnet_classification_1.pt')

In [11]:
#Load the model trained above on 45 epoch for further training 
model_path = 'my_model_rotnet_classification_1.pt'
net.load_state_dict(torch.load(model_path))

optimizer=optim.Adam(net.parameters(),lr=0.001)

train(net, criterion, optimizer, num_epochs=10, decay_epochs=5, init_lr=0.01, task='classification')

# TODO: Save the model
torch.save(net.state_dict(), 'my_model_rotnet_classification_2.pt')

[1,   100] loss: 0.508 acc: 82.41 time: 10.01
[1,   200] loss: 0.546 acc: 81.38 time: 10.07
[1,   300] loss: 0.569 acc: 80.66 time: 10.61
TESTING:
Accuracy of the network on the 10000 test images: 78.68 %
Average loss on the 10000 test images: 0.641
[2,   100] loss: 0.522 acc: 82.50 time: 11.02
[2,   200] loss: 0.515 acc: 82.31 time: 9.75
[2,   300] loss: 0.568 acc: 80.83 time: 10.34
TESTING:
Accuracy of the network on the 10000 test images: 72.80 %
Average loss on the 10000 test images: 1.138
[3,   100] loss: 0.657 acc: 77.87 time: 11.06
[3,   200] loss: 0.529 acc: 81.47 time: 10.59
[3,   300] loss: 0.595 acc: 80.17 time: 9.17
TESTING:
Accuracy of the network on the 10000 test images: 79.89 %
Average loss on the 10000 test images: 0.598
[4,   100] loss: 0.501 acc: 82.96 time: 10.30
[4,   200] loss: 0.490 acc: 83.23 time: 10.83
[4,   300] loss: 0.489 acc: 83.13 time: 10.49
TESTING:
Accuracy of the network on the 10000 test images: 80.97 %
Average loss on the 10000 test images: 0.566
[5