## Transfer Learning

[Code](https://github.com/priyammaz/PyTorch-Adventures/blob/main/PyTorch%20Basics/Basics%20of%20Transfer%20Learning/Transfer%20Learning.ipynb)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import AlexNet
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import numpy as np

## Training the model

In [10]:
path_to_data = "./catsanddogs/PetImages/"

train_transform = transforms.Compose(
    [
        transforms.Resize([224, 224]),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

dataset = ImageFolder(root=path_to_data, transform=train_transform)
dataset

Dataset ImageFolder
    Number of datapoints: 24998
    Root location: ./catsanddogs/PetImages/
    StandardTransform
Transform: Compose(
               Resize(size=[224, 224], interpolation=bilinear, max_size=None, antialias=True)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [11]:
model = AlexNet()
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [4]:
model.classifier[1]

Linear(in_features=9216, out_features=4096, bias=True)

In [5]:
model.classifier[6] = nn.Linear(4096, 2)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [6]:
rand_data = torch.randn(16, 3, 224, 224)
model(rand_data).shape

torch.Size([16, 2])

In [7]:
num_params = 0

for name, param in model.named_parameters():
    num_params += param.numel()
    print(name, param.shape)

print("------------------------")
print("Total Parameters in Model:", num_params)

features.0.weight torch.Size([64, 3, 11, 11])
features.0.bias torch.Size([64])
features.3.weight torch.Size([192, 64, 5, 5])
features.3.bias torch.Size([192])
features.6.weight torch.Size([384, 192, 3, 3])
features.6.bias torch.Size([384])
features.8.weight torch.Size([256, 384, 3, 3])
features.8.bias torch.Size([256])
features.10.weight torch.Size([256, 256, 3, 3])
features.10.bias torch.Size([256])
classifier.1.weight torch.Size([4096, 9216])
classifier.1.bias torch.Size([4096])
classifier.4.weight torch.Size([4096, 4096])
classifier.4.bias torch.Size([4096])
classifier.6.weight torch.Size([2, 4096])
classifier.6.bias torch.Size([2])
------------------------
Total Parameters in Model: 57012034


In [12]:
train_samples, test_samples = int(0.9 * len(dataset)), len(dataset) - int(0.9 * len(dataset))
train_dataset, val_dataset = torch.utils.data.random_split(dataset, lengths=[train_samples, test_samples])

In [13]:
DEVICE = "mps:0"

In [14]:
model = model.to(DEVICE)
EPOCHS = 2
LR = 0.0001
BATCH_SIZE = 16

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

In [15]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
def train(model, device, epochs, optimizer, loss_fn, train_loader, val_loader):
    log_training = {
        "epoch": [],
        "training_loss": [],
        "validation_loss": [],
        "training_acc": [],
        "validation_acc": [],
    }

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")

        train_losses, train_accuracies = [], []
        val_losses, val_accuracies = [], []

        model.train()
        for image, label in tqdm(train_loader, desc="Training"):
            image, label = image.to(device), label.to(device)

            output = model(image)
            loss = loss_fn(output, label)
            train_losses.append(loss.item())

            # Compute accuracy
            predictions = torch.argmax(output, axis=-1)
            accuracy = (predictions == label).float().mean()
            train_accuracies.append(accuracy.item())

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        model.eval()
        for image, label in tqdm(val_loader, desc="Validation"):
            image, label = image.to(device), label.to(device)

            with torch.no_grad():
                output = model(image)
                loss = loss_fn(output, label)
                val_losses.append(loss.item())

                # Compute accuracy
                predictions = torch.argmax(output, axis=-1)
                accuracy = (predictions == label).float().mean()
                val_accuracies.append(accuracy.item())

        training_loss_mean, training_acc_mean = np.mean(train_losses), np.mean(train_accuracies)
        valid_loss_mean, valid_acc_mean = np.mean(val_losses), np.mean(val_accuracies)

        log_training["epoch"].append(epoch)
        log_training["training_loss"].append(training_loss_mean)
        log_training["training_acc"].append(training_acc_mean)
        log_training["validation_loss"].append(valid_loss_mean)
        log_training["validation_acc"].append(valid_acc_mean)

        print("Training Loss:", training_loss_mean)
        print("Training Acc:", training_acc_mean)
        print("Validation Loss:", valid_loss_mean)
        print("Validation Acc:", valid_acc_mean)

    return log_training, model

In [None]:
random_init_log, model = train(model, DEVICE, EPOCHS, optimizer, loss_fn, train_loader, val_loader)

## Transfer Learning

In [18]:
model = torch.hub.load("pytorch/vision:v0.10.0", "alexnet", pretrained=True)
model.classifier[6] = nn.Linear(4096, 2)
model = model.to(DEVICE)

### MODEL TRAINING INPUTS ###
EPOCHS = 2
optimizer = optim.Adam(params=model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()
BATCH_SIZE = 128

### BUILD DATALOADERS ###
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

random_init_logs, model = train(
    model=model,
    device=DEVICE,
    epochs=EPOCHS,
    optimizer=optimizer,
    loss_fn=loss_fn,
    train_loader=train_loader,
    val_loader=val_loader,
)

Using cache found in /Users/rafi/.cache/torch/hub/pytorch_vision_v0.10.0


Epoch 1/2


Training: 100%|██████████| 176/176 [01:36<00:00,  1.83it/s]
Validation: 100%|██████████| 20/20 [00:08<00:00,  2.49it/s]


Training Loss: 0.11138437123207207
Training Acc: 0.9552013278007507
Validation Loss: 0.0847203329205513
Validation Acc: 0.9641544103622437
Epoch 2/2


Training: 100%|██████████| 176/176 [01:32<00:00,  1.89it/s]
Validation: 100%|██████████| 20/20 [00:07<00:00,  2.55it/s]

Training Loss: 0.06739185641477392
Training Acc: 0.974537808786739
Validation Loss: 0.08807402215898037
Validation Acc: 0.9638097435235977





## Train the Final Classifier Layer

In [17]:
model = torch.hub.load("pytorch/vision:v0.10.0", "alexnet", pretrained=True)
model.classifier[6] = nn.Linear(4096, 2)

# Check the name of all the parameters
for name, param in model.named_parameters():
    if "classifier.6" not in name:
        param.requires_grad_(False)  # Inplace turn of gradient updates

model = model.to(DEVICE)

### MODEL TRAINING INPUTS ###
EPOCHS = 2
optimizer = optim.Adam(params=model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()
BATCH_SIZE = 128

### BUILD DATALOADERS ###
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

random_init_logs, model = train(
    model=model,
    device=DEVICE,
    epochs=EPOCHS,
    optimizer=optimizer,
    loss_fn=loss_fn,
    train_loader=train_loader,
    val_loader=val_loader,
)

Using cache found in /Users/rafi/.cache/torch/hub/pytorch_vision_v0.10.0


Epoch 1/2


Training: 100%|██████████| 1407/1407 [01:24<00:00, 16.58it/s]
Validation: 100%|██████████| 157/157 [00:09<00:00, 16.34it/s]


Training Loss: 0.14536999637049136
Training Acc: 0.9407871357498223
Validation Loss: 0.10788703407605837
Validation Acc: 0.9538216560509554
Epoch 2/2


Training: 100%|██████████| 1407/1407 [01:21<00:00, 17.36it/s]
Validation: 100%|██████████| 157/157 [00:08<00:00, 18.22it/s]

Training Loss: 0.10665666217871737
Training Acc: 0.9575337597725657
Validation Loss: 0.0993854672481917
Validation Acc: 0.9574044585987261



