# Transfer Learning

We will do the following steps in order.
1. Load and normalize the CIFAR-10 training and test datasets using torchvision
2. Create a pretrained CNN
3. Modify the pretrained CNN
3. Define a loss function
4. Train the network on the training data
5. Test the network on the test data


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

random_seed = 4332
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

use_cuda = True
if use_cuda and torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

## Dataset

1. Load and normalize the CIFAR-10 training and test datasets using torchvision

In [None]:
# The output of torchvision datasets are PILImage images of range [0,1].
# Transform them to Tensors of normalized range [-1, 1]

mean = torch.tensor((0.4914, 0.4822, 0.4465))
std = torch.tensor((0.2023, 0.1994, 0.2010))

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# display images
def imshow(img, std=std, mean=mean):
    img = img * std[:,None,None] + mean[:,None,None]  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
    
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))

# print labels
print(''.join('%s, ' % classes[labels[j]] for j in range(4)))

## Define the network

2. Create a pretrained CNN
3. Modify the pretrained CNN


* Network Model
    * `torchvision.models.resnet18` - pretrained=True
        * Fully connected layer - in_features=???, out_features=10
            * replace resnet18.fc with new fc layer. `resnet18.fc = nn.Linear(...)`.
            * get in_features of existing fc layer by `resnet18.fc.in_features`.

* Do not apply softmax activation function for the output layer.
    * softmax activation function are included in the loss function.


In [None]:
from torchvision import models

class ResNet18(nn.Module):
    def __init__ (self):
        super(ResNet18, self).__init__()
        # TODO
        # crate pretrained CNN
        self.resnet = models.resnet18(pretrained=True)
        # (optional) freeze the model

        # get in_features from fc layer in resnet18 
        in_features = self.resnet.fc.in_features
        # create new fc layer
        self.resnet.fc = nn.Linear(in_features, 10)

    def forward(self, x):
        #
        x = self.resnet(x)
        return x

net = ResNet18().to(device)

## Training

### Define a Loss function and optimizer

3. Define a loss function


* Use Classification Cross-Entropy loss
* Use SGD with learning rate 0.01 and momentum 0.9

In [None]:
# TODO
loss_obj = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

## Train the network on the training data

4. Train the network on the training data

In [None]:
net.train()

## TODO
print('Start Training ')
num_epochs = 10
# for epoch
    running_loss = 0.0
    # for a batch from trainloader
        # batch to gpu

        # forward
        
        # get loss
        
        # backward + optimize

        # print statistics
        running_loss += loss.item()
        print('\r[Epoch {}/{}][itr {}/{}] Loss: {}'.format(epoch+1, num_epochs, i+1, len(trainloader), running_loss/(i+1)), end='')

print('Finished Training')

## Testing

### Show network prediction

In [None]:
net.eval()

# display ground truth
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GrondTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [None]:
# display predicted
with torch.no_grad():
    outputs = net(images.to(device))

_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

### Test the network on the test data

5. Test the network on the test data

#### Accuracy

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

#### Accuracy of each class

In [None]:
class_correct = [0.0] * 10
class_total = [0.0] * 10
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(labels.shape[0]):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))