<a href="https://colab.research.google.com/github/DerNiccoo/TheEarlyBird/blob/main/Aufgabe3/Alina/Assignment3_Notebook2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Notebook 2: Transfer Learning**

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import time

In [2]:
#Choose a device: GPU
device_gpu = torch.device("cuda:0")
device_cpu = torch.device('cpu')

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 8)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# **1. Load a reduced dataset for transfer learning that:**
  1. Only contains the classes „ship“ and „truck“
  2. Only contains the first 50 images of the original dataset that are either „ship“ or „truck“.

In [75]:
class CIFAR_X(torchvision.datasets.CIFAR10):

    def __init__(self, *args, exclude_list=[], **kwargs):
        super(CIFAR_X, self).__init__(*args, **kwargs)

        if exclude_list == []:
            return

        labels = np.array(self.targets)
        exclude = np.array(exclude_list).reshape(1, -1)
        filter = ~(labels.reshape(-1, 1) == exclude).any(axis=1)

        labels[labels == 8] = 0
        labels[labels == 9] = 1

        self.data = self.data[filter]
        self.targets = labels[filter].tolist()

        self.data = self.data[:50]
        self.targets = self.targets[:50]

In [101]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
                  
trainset = CIFAR_X(root='./data', train=True, download=True, transform = transform, exclude_list=[0,1,2,3,4,5,6,7])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = CIFAR_X(root='./data', train=False, download=True, transform = transform, exclude_list=[0,1,2,3,4,5,6,7])
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


# **2. Apply transfer learning by loading the model that was trained on the eight CIFAR classes (Notebook 1) and do the following experiments:**

i. Re-initalize the last fully connected layer to predict on two classes.<br>
ii. Train on the 50 training images (as on the last slide) with the model using two different approaches:<br>
> a. Fine-tune all parameters (i.e. do not freeze parameters but reduce learning rate).<br>
> b. Freeze all parameters expect the ones from the last fc-layer.<br>

iii. Calculate the accuracy for both approaches on 50 test images.

In [44]:
# Load the trained Net
PATH = '/content/drive/My Drive/KI_Lab/model'
net = Net()
net.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

#For GPU: 
#net.load_state_dict(torch.load(PATH, map_location="cuda:0"))
#net.to(device)

<All keys matched successfully>

In [45]:
#Re-initialize the lastfully connected layer 
net.fc3 = nn.Linear(net.fc3.in_features, 2)
net.to(device_cpu)
#net.to(device_gpu)


Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=2, bias=True)
)

# **Train Method**

In [70]:
def train(net, epochs, criterion, optimizer, device, trainloader):
  t0 = time.time()

  for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 10 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

  print('{} seconds'.format(time.time() - t0))



# **Test Method**

In [92]:
def test(net, device, testloader):
  correct = 0
  total = 0
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          images, labels = images.to(device), labels.to(device)
          outputs = net(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  print('Accuracy of the network on the {} test images: {}%'.format(
      len(testloader.dataset), 100 * correct / total))

> a. Fine-tune all parameters (i.e. do not freeze parameters but reduce learning rate).<br>


In [87]:
for param in net.parameters():
    param.requires_grad = True

net.fc3 = nn.Linear(net.fc3.in_features, 2)
net.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
epochs = 30

train(net, epochs, criterion, optimizer, device_cpu, trainloader)
test(net, device_cpu, testloader)


[1,    10] loss: 0.540
[2,    10] loss: 0.472
[3,    10] loss: 0.350
[4,    10] loss: 0.285
[5,    10] loss: 0.247
[6,    10] loss: 0.284
[7,    10] loss: 0.142
[8,    10] loss: 0.203
[9,    10] loss: 0.179
[10,    10] loss: 0.102
[11,    10] loss: 0.085
[12,    10] loss: 0.102
[13,    10] loss: 0.107
[14,    10] loss: 0.054
[15,    10] loss: 0.045
[16,    10] loss: 0.063
[17,    10] loss: 0.028
[18,    10] loss: 0.047
[19,    10] loss: 0.035
[20,    10] loss: 0.031
[21,    10] loss: 0.027
[22,    10] loss: 0.023
[23,    10] loss: 0.017
[24,    10] loss: 0.018
[25,    10] loss: 0.013
[26,    10] loss: 0.013
[27,    10] loss: 0.012
[28,    10] loss: 0.010
[29,    10] loss: 0.011
[30,    10] loss: 0.010
3.6365394592285156 seconds
Accuracy of the network on the 10000 test images: 82 %


In [81]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.000001, momentum=0.9)
epochs = 30

train(net, epochs, criterion, optimizer, device_cpu, trainloader)
test(net, device_cpu, testloader)

[1,    10] loss: 0.402
[2,    10] loss: 0.433
[3,    10] loss: 0.396
[4,    10] loss: 0.424
[5,    10] loss: 0.346
[6,    10] loss: 0.382
[7,    10] loss: 0.411
[8,    10] loss: 0.420
[9,    10] loss: 0.367
[10,    10] loss: 0.413
[11,    10] loss: 0.393
[12,    10] loss: 0.373
[13,    10] loss: 0.384
[14,    10] loss: 0.392
[15,    10] loss: 0.415
[16,    10] loss: 0.400
[17,    10] loss: 0.374
[18,    10] loss: 0.391
[19,    10] loss: 0.372
[20,    10] loss: 0.389
[21,    10] loss: 0.427
[22,    10] loss: 0.404
[23,    10] loss: 0.387
[24,    10] loss: 0.434
[25,    10] loss: 0.425
[26,    10] loss: 0.377
[27,    10] loss: 0.400
[28,    10] loss: 0.396
[29,    10] loss: 0.416
[30,    10] loss: 0.387
3.7235867977142334 seconds
Accuracy of the network on the 10000 test images: 82 %


> b. Freeze all parameters expect the ones from the last fc-layer.<br>

In [83]:
for param in net.parameters():
    param.requires_grad = False

net.fc3 = nn.Linear(net.fc3.in_features, 2)
net.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.00001, momentum=0.9)
epochs = 30

train(net, epochs, criterion, optimizer, device_cpu, trainloader)
test(net, device_cpu, testloader)

[1,    10] loss: 0.756
[2,    10] loss: 0.775
[3,    10] loss: 0.743
[4,    10] loss: 0.763
[5,    10] loss: 0.746
[6,    10] loss: 0.721
[7,    10] loss: 0.754
[8,    10] loss: 0.739
[9,    10] loss: 0.727
[10,    10] loss: 0.741
[11,    10] loss: 0.709
[12,    10] loss: 0.737
[13,    10] loss: 0.718
[14,    10] loss: 0.712
[15,    10] loss: 0.702
[16,    10] loss: 0.724
[17,    10] loss: 0.700
[18,    10] loss: 0.703
[19,    10] loss: 0.718
[20,    10] loss: 0.707
[21,    10] loss: 0.690
[22,    10] loss: 0.673
[23,    10] loss: 0.694
[24,    10] loss: 0.702
[25,    10] loss: 0.692
[26,    10] loss: 0.668
[27,    10] loss: 0.698
[28,    10] loss: 0.649
[29,    10] loss: 0.699
[30,    10] loss: 0.665
3.3472564220428467 seconds
Accuracy of the network on the 10000 test images: 52 %


In [84]:
for param in net.parameters():
    param.requires_grad = False

net.fc3 = nn.Linear(net.fc3.in_features, 2)
net.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
epochs = 30

train(net, epochs, criterion, optimizer, device_cpu, trainloader)
test(net, device_cpu, testloader)

[1,    10] loss: 0.790
[2,    10] loss: 0.558
[3,    10] loss: 0.504
[4,    10] loss: 0.368
[5,    10] loss: 0.385
[6,    10] loss: 0.409
[7,    10] loss: 0.341
[8,    10] loss: 0.361
[9,    10] loss: 0.349
[10,    10] loss: 0.317
[11,    10] loss: 0.383
[12,    10] loss: 0.331
[13,    10] loss: 0.340
[14,    10] loss: 0.325
[15,    10] loss: 0.259
[16,    10] loss: 0.388
[17,    10] loss: 0.304
[18,    10] loss: 0.360
[19,    10] loss: 0.244
[20,    10] loss: 0.304
[21,    10] loss: 0.355
[22,    10] loss: 0.364
[23,    10] loss: 0.302
[24,    10] loss: 0.279
[25,    10] loss: 0.346
[26,    10] loss: 0.295
[27,    10] loss: 0.332
[28,    10] loss: 0.304
[29,    10] loss: 0.374
[30,    10] loss: 0.381
3.1801741123199463 seconds
Accuracy of the network on the 10000 test images: 80 %


# **3. Do the same as in step 2 but use the pre-trained network resnet18 from the torch vision library. Compare the results.**

In [102]:
model_freezed = torchvision.models.resnet18(pretrained=True)

for param in model_freezed.parameters():
    param.requires_grad = False

num_ftrs = model_freezed.fc.in_features
model_freezed.fc = nn.Linear(num_ftrs, 2)

model_freezed.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
epochs = 5

train(model_freezed, epochs, criterion, optimizer, device_cpu, trainloader)
test(model_freezed, device_cpu, testloader)

[1,    10] loss: 0.934
[2,    10] loss: 1.091
[3,    10] loss: 1.084
[4,    10] loss: 1.070
[5,    10] loss: 0.843
3.541722536087036 seconds
Accuracy of the network on the 50 test images: 58.0%


In [103]:
model = torchvision.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = True

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

model.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
epochs = 5

train(model, epochs, criterion, optimizer, device_cpu, trainloader)
test(model, device_cpu, testloader)

[1,    10] loss: 0.790
[2,    10] loss: 0.697
[3,    10] loss: 0.770
[4,    10] loss: 0.801
[5,    10] loss: 0.895
9.76911735534668 seconds
Accuracy of the network on the 50 test images: 46.0%


# **4. Resnet18 was pre-trained on the image net data, which has an image resolution of 224x224. However, Resnet18 still works on other solutions because it uses an adaptive pooling layer before the fc-layer (see discussion here). Try to see if you can improve the results by resizing the CIFAR images to 224x224 before passing it to the Resnet.**

In [98]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), torchvision.transforms.Resize([224, 224])])

#transforms.Resize((128,128),interpolation=Image.NEAREST
                  
trainset_resized = CIFAR_X(root='./data', train=True, download=True, transform = transform, exclude_list=[0,1,2,3,4,5,6,7])
trainloader_resized = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset_resized = CIFAR_X(root='./data', train=False, download=True, transform = transform, exclude_list=[0,1,2,3,4,5,6,7])
testloader_resized = torch.utils.data.DataLoader(testset, batch_size=4,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [100]:
model_resized = torchvision.models.resnet18(pretrained=True)

for param in model_resized.parameters():
    param.requires_grad = True

num_ftrs = model_resized.fc.in_features
model_resized.fc = nn.Linear(num_ftrs, 2)

model_resized.to(device_cpu)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
epochs = 5

train(model_resized, epochs, criterion, optimizer, device_cpu, trainloader_resized)
test(model_resized, device_cpu, testloader_resized)

[1,    10] loss: 0.780
[2,    10] loss: 0.800
[3,    10] loss: 0.749
[4,    10] loss: 0.802
[5,    10] loss: 0.814
67.15398716926575 seconds
Accuracy of the network on the 50 test images: 54.0%
