# Ants and Bees - Classification to Demonstrate Transfer Learning

- This notebook is supposed to be an amateur's first foray into Transfer Learning.
- It operates on a dataset that is relatively small - with 120 images for ants and bees. There are 75 validation images for each class as well.
- Usually, this is a very small dataset to generalize upon, if trained from scratch. Since we are using transfer learning, we should be able to generalize reasonably well.

### Relevant Imports

In [None]:
import torch
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
import numpy as np
import torchvision
from torchvision import datasets, transforms as T, models
import matplotlib.pyplot as plt
import time
import os

plt.ion()

### Load Data

In [None]:
data_transforms = {
    'train': T.Compose([
        T.RandomResizedCrop(224),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': T.Compose([
        T.Resize(256),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [None]:
batch_size = 4

In [None]:
from os import path

data_dir = "../input/hymenoptera/hymenoptera"
# data_dir = path.join(data_dest)

image_datasets = {x: datasets.ImageFolder(
    os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=(
    True if x == 'train' else False), num_workers=2) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
category_names = image_datasets['train'].classes


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

### Visualising a Few Images

- We'll plot a few images to see the effects of the data transforms.

In [None]:
from torch import Tensor


def imshow(inp: Tensor, title=None):
  """Plot image if `inp` is a tensor"""

  inp = inp.cpu().numpy().transpose((1, 2, 0))
  mean = np.array([0.485, 0.456, 0.406])
  std = np.array([0.229, 0.224, 0.225])

  inp = std * inp + mean
  np.clip(inp, 0, 1)

  if title is not None:
    plt.title(title)

  return plt.imshow(inp)


In [None]:
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, [category_names[x] for x in classes])


### Training the Model

It will help us to:
  - schedule the learning rate
  - saving the best model

In [None]:
from copy import deepcopy
from torch.nn import Module


def train_model(model: Module, criterion, optimizer: optim.Optimizer, scheduler, num_epochs=25):
  since = time.time()

  best_model_wts = deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs):
    print(f"Epoch: {epoch} / {num_epochs}")

    for phase in ['train', 'val']:
      if phase == 'train':
        model.train()

      else:
        model.eval()

      running_loss, running_corrects = 0.0, 0

      for inputs, labels in dataloaders[phase]:
        inputs, labels = inputs.to(device), labels.to(device)

        # zero all the layers' parameters gradients
        optimizer.zero_grad()

        # feed forward
        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          _, preds = torch.max(outputs, dim=1)
          loss = criterion(outputs, labels)

          # backpropagate loss only if training
          if phase == 'train':
            loss.backward()
            optimizer.step()

        # default loss item is mean, therefore we multiply it with the number of items in the batch
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels)

      if phase == 'train':
        scheduler.step()

      epoch_loss = running_loss / dataset_sizes[phase]
      epoch_acc = running_corrects.double() / dataset_sizes[phase]

      print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

      # deep copy the model
      if phase == 'val' and epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model_wts = deepcopy(model.state_dict())

      print()

  time_elapsed = time.time() - since
  print(
      f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
  print(f'Best val Acc: {best_acc:4f}')

  model.load_state_dict(best_model_wts)
  return model


### Visualising Model Predictions

In [None]:
from matplotlib.image import AxesImage


def process_tensor_to_display(inp: Tensor) -> np.ndarray:
  inp = inp.cpu().numpy().transpose((1, 2, 0))
  mean = np.array([0.485, 0.456, 0.406])
  std = np.array([0.229, 0.224, 0.225])

  inp = std * inp + mean
  np.clip(inp, 0, 1)

  return inp


In [None]:
def visualise_predictions(model: Module, num_images: int, n_rows: int, n_cols: int):
  was_training = model.training
  model.eval()
  images_so_far = 0
  fig, ax = plt.subplots(n_rows, n_cols)

  with torch.no_grad():
    for i, (inputs, labels) in enumerate(dataloaders['val']):
      inputs, labels = inputs.to(device), labels.to(device)

      outputs = model(inputs)
      _, preds = torch.max(outputs, dim=1)

      for j in range(inputs.size(0)):

        # below call takes number of rows, number of cols, index of item that starts at 1
        plotX, plotY = images_so_far // n_cols, images_so_far % n_cols
        ax[plotX, plotY].imshow(process_tensor_to_display(inputs[j].detach()))
        ax[plotX, plotY].axis('off')
        ax[plotX, plotY].set_title(f"{category_names[preds[j]]}")
        images_so_far += 1

        # imshow(inputs[j].detach())

        if images_so_far == num_images:
          model.train(was_training)
          return

  model.train(was_training)


### Finetuning the convnet

Load a pretrained model and reset final fully connected layer

In [None]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, len(category_names))

model_ft = model_ft.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, 7, gamma=0.1)

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=30)

In [None]:
visualise_predictions(model_ft, 6, 2, 3)

### ConvNet as a Fixed Feature Extractor

- Here we will freeze all the layers of the network, except for the final layer.
- We need to set `requires_grad = False` for all those layers, so that their loss is not computed during the backward pass.


In [None]:
model_conv = models.resnet34(pretrained=True)

for param in model_conv.parameters():
  param.requires_grad = False

In [None]:
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, len(category_names))

model_conv.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model_conv.parameters(), 0.001, 0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, 7, 0.01)

### Train and Evaluate

In [None]:
model_conv = train_model(model_conv, criterion, optimizer, exp_lr_scheduler)

In [None]:
visualise_predictions(model_conv, 10, 2, 5)