<a href="https://colab.research.google.com/github/MihaiDogariu/Keysight-Deep-Learning-Fundamentals--v2-/blob/main/scripts/Unit_10_Transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Transfer Learning Example

In [None]:
import torch
import copy
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import models, transforms
import zipfile

In [None]:
!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip

In [None]:
with zipfile.ZipFile('hymenoptera_data.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
val_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [None]:
class HymenopteraDataset(torch.utils.data.Dataset):
    def __init__(self, dir_path, transform=None):
        self.dir_path = dir_path
        self.img_folder = torchvision.datasets.ImageFolder(dir_path, transform=transform)
        # torchvision.datasets.ImageFolder processes datasets that have the following structure:
        #
        # directory/
        # ├── class_x
        # │   ├── xxx.ext
        # │   ├── xxy.ext
        # │   └── ...
        # │   └── xxz.ext
        # └── class_y
        #     ├── 123.ext
        #     ├── nsdf3.ext
        #     └── ...
        #     └── asd932_.ext

    def __len__(self):
        return len(self.img_folder)

    def __getitem__(self, idx):
        return self.img_folder[idx]

In [None]:
train_dataset = HymenopteraDataset('hymenoptera_data/train', transform=train_transforms)
val_dataset = HymenopteraDataset('hymenoptera_data/val', transform=val_transforms)

# Define data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset,  batch_size=4, shuffle=False)

# Move the model to the GPU if available
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
# Load the pre-trained VGG19 model
vgg19 = models.vgg19(weights=models.VGG19_Weights.DEFAULT)
# print(vgg19)

In [None]:
# Make a deep copy of the vgg19 model in case we want to perform different changes on it
vgg19_copy =  copy.deepcopy(vgg19)
# print(vgg19_copy)

In [None]:
# Remove the last layer of the classifier
vgg19_copy.classifier = torch.nn.Sequential(*list(vgg19.classifier.children())[:-1])
# print(vgg19_copy)

In [None]:
# Create a different fully connected layer, adapted to the new classification problem and attach it to the end of the pre-trained model
num_ftrs = vgg19_copy.classifier[3].out_features
vgg19_copy.classifier = nn.Sequential(torch.nn.Sequential(*list(vgg19_copy.classifier.children()), nn.Linear(in_features=num_ftrs, out_features=2)))
# print(vgg19_copy)

In [None]:
# Move model to device
vgg19_copy = vgg19_copy.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg19_copy.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):  # loop over the dataset multiple times
    vgg19_copy.train(True)
    for i, (inputs, labels) in enumerate(train_loader):
        # get the inputs and labels
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = vgg19_copy(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        print(f'Epoch: {epoch+1}, Batch: {i+1}, Loss: {loss.item()}')

    # Running the model on the validation dataset
    vgg19_copy.train(False)
    with torch.no_grad():
        correct = 0
        total = 0
        for i, (inputs, labels) in enumerate(val_loader):
            # get the inputs and labels
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = vgg19_copy(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Accuracy: {}%'.format(100 * correct / total))

print('Finished Training')