# Transfer Learning

Using an already trained model and only training the last layer of the neural network to speed up the training process.

**Step 1** - Importing the required libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

**Step 2** - Uploading the file containing the dataset

Instructions for Running:
1. Add the shared dataset folder to your Google Drive. (find it in the github repo https://github.com/DarioVajda/ML_basics)
2. Mount your Google Drive in Colab.
3. Access the dataset using the path where you added the folder.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

data_dir = '/content/drive/My Drive/google_colab/hymenoptera_data'

**Step 3** -  Loading the datasets

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
}

sets = [ 'train', 'val' ]
image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in [ 'train', 'val' ] }
dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) for x in [ 'train', 'val' ] }

dataset_sizes = { x: len(image_datasets[x]) for x in [ 'train', 'val' ]}
class_names = image_datasets['train'].classes
print(class_names)

**Step 4** - Function for training the model

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs}')
        print('_' * 20)

        for phase in [ 'train', 'val' ]:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase=='train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase=='train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase}, Loss: {epoch_loss:.4f}; Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model

**Step 5.1** - Fine tuning the pretrained model

Fine tuning is training an entire existing model on a lot less new data. Now all of the weights will be updated in the old model.

In [None]:
# importing the pretrained model
model = models.resnet18(pretrained=True)

# creating a new last layer for the neural network, there are 2 outputs because it will be used for classification of two classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),  lr=0.001)

# defining the scheduler - it will update the learning rate while training the model
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# calling the training loop function
model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)

**Step 5.2** - Training only the last layer of the pretrained model

All layers except the first one in the pretrained model will be frozen and the old weights will remain unchanged. Only the last layer of the pretrained model will be changed in our training on the new dataset.

In [None]:
# importing the pretrained model
model = models.resnet18(pretrained=True)

# freezing all layers in the pretrained model
for param in model.parameters():
    param.requires_grad = False

# creating a new last layer for the neural network, there are 2 outputs because it will be used for classification of two classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),  lr=0.001)

# defining the scheduler - it will update the learning rate while training the model
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# calling the training loop function
model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)