- Here we are going to fine-tuner a model by freezing some of the layers of the architcture and training the last layers

### Imports

In [16]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision

### Set Device

In [2]:
davice = "cpu"

if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"

### Hyperparameters

In [3]:
in_channels = 3
num_classes = 10
learning_rate = 1e-3
batch_size = 1024
num_epochs = 5

### Load Pre-trained model & modify it

- we are going to change the `(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))` layer (making it flow output as it is)

- Also, changeing the `classifier` to 10 output classes

- freezing the rest weights

In [9]:
class Identity(nn.Module):

    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x # do nothing

In [24]:
model = torchvision.models.vgg16(pretrained=True)



In [7]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
model.avgpool = Identity()

- if `avgpool` would have been in `modelist` then to change only a particular component , let's say it was 1st component in list,
```python
model.avgpool[0] = Identiy()
```

this way we should change it

- lets say we also want to change, the model.classifier

In [11]:
model.classifier = nn.Linear(512, 10)

In [12]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

### Load Data

In [19]:
train_dataset = datasets.CIFAR10(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.CIFAR10(root='dataset/', train=False, transform=transforms.ToTensor(), download=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to dataset/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [10:04<00:00, 282054.01it/s] 


Extracting dataset/cifar-10-python.tar.gz to dataset/


### Loss & Optimizers

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

### Train Network

In [21]:
for epoch in range(num_epochs):

    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.to(device)
        target = target.to(device)

        # data = data.reshape(data.shape[0], -1) # (64, 1, 28, 28) --> (64, 1x28x28) = (64, 784)

        # forward pass
        scores = model(data) # Forward pass
        
        loss = criterion(scores, target) # Compute loss

        # backward
        optimizer.zero_grad() # Clear previous gradients

        loss.backward() # Backpropagation

        optimizer.step() # Update model weights

        total_loss = loss.item() # Accumulate loss

    average_loss = total_loss / len(train_loader) # Calculating average loss

    print(f"Epoch {epoch}: Average Loss: {average_loss}")

Epoch 0: Average Loss: 0.04476244109017508
Epoch 1: Average Loss: 0.028968995931197186
Epoch 2: Average Loss: 0.018904498645237515
Epoch 3: Average Loss: 0.013902945177895682
Epoch 4: Average Loss: 0.010287751956861846


### Check accuracy

In [22]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on training data")
    else:
        print("CHecking accuarcy on testing data")

    num_correct = 0
    num_samples = 0
    model.eval() # put model into evaluation mode

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            scores = model(x)

            _, predictions = scores.max(1)

            num_correct += (predictions == y).sum()

            num_samples += predictions.size(0)
        acc = float(num_correct) / float(num_samples) * 100

        print(f"Got {num_correct} / {num_samples} with accuracy {acc:.2f}")
    
    # model.train()

In [23]:
check_accuracy(train_loader, model)

Checking accuracy on training data
Got 42897 / 50000 with accuracy 85.79


### Freezing Layers and Tunning : Transfer Learning

In [26]:
model = torchvision.models.vgg16(pretrained=True)



In [27]:
# freezing the weights
for param in model.parameters():
    param.requires_grad = False

In [28]:
model.avgpool = Identity()
model.classifier = nn.Linear(512, 10)
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [30]:
for epoch in range(num_epochs):

    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.to(device)
        target = target.to(device)

        # data = data.reshape(data.shape[0], -1) # (64, 1, 28, 28) --> (64, 1x28x28) = (64, 784)

        # forward pass
        scores = model(data) # Forward pass
        
        loss = criterion(scores, target) # Compute loss

        # backward
        optimizer.zero_grad() # Clear previous gradients

        loss.backward() # Backpropagation

        optimizer.step() # Update model weights

        total_loss = loss.item() # Accumulate loss

    average_loss = total_loss / len(train_loader) # Calculating average loss

    print(f"Epoch {epoch}: Average Loss: {average_loss}")

Epoch 0: Average Loss: 0.02999521761524434
Epoch 1: Average Loss: 0.02761255964940908
Epoch 2: Average Loss: 0.025301376167608768
Epoch 3: Average Loss: 0.02410534936554578
Epoch 4: Average Loss: 0.024034989123441736


### Models Performance

In [31]:
check_accuracy(train_loader, model)

Checking accuracy on training data
Got 29583 / 50000 with accuracy 59.17
