# Transfer Learning

# Step 1  -> import all required libraries 

In [1]:
!pip install -q torchinfo

In [2]:
import torch
import torchvision

import matplotlib.pyplot as plt
from torchinfo import summary

print(f"torch: {torch.__version__}")
print(f"torchvision: {torchvision.__version__}")

torch: 1.13.1+cu116
torchvision: 0.14.1+cu116


# Step 2 Device agnostic code

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Step 3 Download data

In [4]:
# Download zip file
import os
import requests
import zipfile
from pathlib import Path

# Set Data path
data_path = Path("data/")
image_path = data_path / "food"

# if image folder doesn't exists download from github
if image_path.is_dir():
  print(f"{image_path} already exists.")
else:
  print(f"create new directory {image_path}")
  image_path.mkdir(parents=True, exist_ok=True)

# open temp zip file and download data into tmp zip file and extract it
with open(data_path / "food.zip", "wb") as f:
  req = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
  print("Downloading data")
  f.write(req.content)

# open zip file and extract all content
with zipfile.ZipFile(data_path / "food.zip", "r") as f:
  print(f"Unzipping zip file")
  f.extractall(image_path)

#remove zip file
os.remove(data_path / "food.zip")

create new directory data/food
Downloading data
Unzipping zip file


# Step 4 -> Create dataset and dataloaders

In [5]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = os.cpu_count()

def create_dataloaders(train_data:str,
                       test_data:str,
                       transform:transforms.Compose,
                       batch_size:int,
                       num_workers=NUM_WORKERS):
  # 1. create datasets
  train_dataset = datasets.ImageFolder(train_data, transform=transform)
  test_dataset = datasets.ImageFolder(test_data, transform=transform)

  # 2. get classname from datasets
  class_names = train_dataset.classes

  # 3 create DataLoaders
  train_dataloader = DataLoader(dataset=train_dataset,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers,
                                pin_memory=True)
  test_dataloader = DataLoader(dataset=test_dataset,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=num_workers,
                               pin_memory=True)
  return train_dataloader, test_dataloader, class_names


# Step 5 - Set same transfrom as pretrained model

In [6]:
manual_transform = transforms.Compose([
    transforms.Resize((224,244)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Step 6 -> Get weights

In [7]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT 
weights

EfficientNet_B0_Weights.IMAGENET1K_V1

In [8]:
#We can also get transform from pretrained weights, but we then our model lack will lack any future customization to transforms
auto_transform = weights.transforms()
auto_transform

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

# Step 7 -> Set Dataloaders

In [22]:
train_dir = "data/food/train"
test_dir = "data/food/test"
BATCH_SIZE = 32
train_dataloader, test_dataloader, class_name = create_dataloaders(train_data=train_dir,
                                                                    test_data=test_dir,
                                                                    transform=manual_transform,
                                                                    batch_size=BATCH_SIZE)
train_dataloader, test_dataloader, class_name

(<torch.utils.data.dataloader.DataLoader at 0x7f54ed306fd0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f54ed306bb0>,
 ['pizza', 'steak', 'sushi'])

# Step 8 -> Get Pretrained Model

In [10]:
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

In [13]:
# display model summary 
summary(model=model,
        input_size=(32,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

# Step 9 -> update classifier layer in model 
we mostly update the classifier layer based on our output
example -> effeicentnet has 1000 classes but our model needs only 3 classes so we change it to 3 and freeze feature layer as we don't want to change anything in feature it is well trained and we want to keep it same 

In [14]:
# Freeze all base layers in the "features" section of the model (the feature extractor) by setting requires_grad=False
for param in model.features.parameters():
  param.requires_grad = False

In [15]:
# after freezing feature parameters re-display model summary, now all trainable cols will be False
summary(model=model,
        input_size=(32,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [16]:
# update classifier layer

output_shape = len(class_name)

model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280, # we get 1280 from model summary
                    out_features=output_shape,
                    bias=True)
).to(device)

# after adding new classifier check summary
summary(model=model,
        input_size=(32,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

# Step 10 -> set loss function and optimizer

In [18]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Step 11 -> Create Train Engine

In [24]:
from tqdm.auto import tqdm
from typing import Dict, List, Tuple


def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: device) -> Tuple[float, float]:
  model.train()

  train_loss, train_acc = 0, 0

  for batch, (X,y) in enumerate(dataloader):
    X,y = X.to(device), y.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class = torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)

  return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: device):
  model.eval()

  test_loss, test_acc = 0, 0

  with torch.inference_mode():
    for batch, (X,y) in enumerate(dataloader):
      X,y = X.to(device), y.to(device)

      y_pred = model(X)

      loss = loss_fn(y_pred, y)
      test_loss += loss.item()

      y_pred_class = torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
      test_acc += (y_pred_class == y).sum().item()/len(y_pred)

  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          loss_fn: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          epochs: int,
          device: device) -> Dict[str, List]:
  results = {"test_loss": [],
             "test_acc": [],
             "train_loss": [],
             "train_acc": []}
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)
    
    print(
        f"train_loss: {train_loss} | "
        f"train_acc: {train_acc} | "
        f"test_loss: {test_loss} | "
        f"test_acc: {test_acc} | "
    )

    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)

  return results


In [25]:
result = train(model=model,
      train_dataloader=train_dataloader,
      test_dataloader=test_dataloader,
      loss_fn=loss_fn,
      optimizer=optimizer,
      epochs=5,
      device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

train_loss: 0.9026920348405838 | train_acc: 0.625 | test_loss: 0.7021269003550211 | test_acc: 0.8759469696969697 | 
train_loss: 0.7716843038797379 | train_acc: 0.7421875 | test_loss: 0.6980566581090292 | test_acc: 0.8248106060606061 | 
train_loss: 0.6694891639053822 | train_acc: 0.77734375 | test_loss: 0.6303264697392782 | test_acc: 0.8967803030303031 | 
train_loss: 0.6336325109004974 | train_acc: 0.7890625 | test_loss: 0.633276104927063 | test_acc: 0.8967803030303031 | 
train_loss: 0.5659329071640968 | train_acc: 0.94140625 | test_loss: 0.5690213044484457 | test_acc: 0.8967803030303031 | 


# Step 12 
Save model as it is best with 94% train accuracy and 89% test accuracy

Result: 
* train_loss: 0.5659329071640968
* train_acc: 0.94140625
* test_loss: 0.5690213044484457
* test_acc: 0.8967803030303031

In [29]:
def save_model(model: torch.nn.Module,
               target_dir:str,
               model_name: str):
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)
  
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "Model name should endwith .pth .pt"
  model_save_path = target_dir_path / model_name

  print(f"saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)
  
save_model(model=model, target_dir="models", model_name="tranfers_l_best.pth")

saving model to: models/tranfers_l_best.pth
