# 7. Experiment tracking

Since machine learning is very experimental , we need to track the which doesn't work , which does work.So we can go the right path so harness the information from the data

In [28]:
import torch
import torchvision
import torchmetrics
import torchinfo
from torch import nn

In [2]:
# set tup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
# helper function for seed
def set_seed(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# 1. Get Data

In [4]:
import os
import requests
import zipfile
from pathlib import Path


def download_data(source: str,
                  destination: str,
                  remove_source: bool = True) -> Path:
    # setup path to data folder
    data_path = Path("data/")
    image_path = data_path / destination

    if image_path.is_dir():
        print(f"[INFO] {image_path} directory already exists , skipping download...")
    else:
        print(f"[INFO] did not find {image_path} directory ,creating one...")
        image_path.mkdir(parents=True, exist_ok=True)

        # download the target data
        target_file = Path(source).name
        with open(data_path / target_file, "wb") as f:
            response = requests.get(source)
            print(f"[INFO] Downloading {target_file} from {source}")
            f.write(response.content)

        # unzip target file
        with zipfile.ZipFile(data_path / target_file, "r") as zip_ref:
            print(f"[INFO] Unzipping {target_file} data...")
            zip_ref.extractall(image_path)

        if remove_source:
            os.remove(data_path / target_file)

        return image_path

In [5]:
# image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
#                            destination="pizza_steak_sushi")
# image_path

In [6]:
image_path = Path("../data/pizza_steak_sushi")

# 2. Creating the dataset and dataloader

## 2.1 Manual transforms

Since we need transform the data in same way that model is trained.

In [7]:
# Set train and test directories
train_dir = image_path / "train"
test_dir = image_path / "test"
train_dir, test_dir

(WindowsPath('../data/pizza_steak_sushi/train'),
 WindowsPath('../data/pizza_steak_sushi/test'))

In [8]:
# set up Image net Normalization
# see here https://pytorch.org/vision/0.12/models.html
from torchvision import transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

manual_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    normalize
])
manual_transform

Compose(
    Resize(size=224, interpolation=bilinear, max_size=None, antialias=None)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [9]:
# create the dataloader
from going_modular import data_setup

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transform,
    batch_size=32)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x193a51f55b0>,
 <torch.utils.data.dataloader.DataLoader at 0x193a51f5f40>,
 ['pizza', 'steak', 'sushi'])

## 2.2 Automatic Transform to create dataloader

Since version V0.13 PyTorch provide the `MulitWeightsApi` which have support that have the best weights for pretrained models,and transform we need to apply on the data same as the transform applied train_data for pretrained models

In [10]:
# best weight for pretrained model of EfficientNetB0
torchvision.models.EfficientNet_B0_Weights.DEFAULT

EfficientNet_B0_Weights.IMAGENET1K_V1

In [11]:
# we can get the transforms that applied on the trained data applied on the pretrained model
auto_transforms = torchvision.models.EfficientNet_B0_Weights.DEFAULT.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [12]:
# we also have the meta information
for k, v in torchvision.models.EfficientNet_B0_Weights.DEFAULT.meta.items():
    if k == "categories":
        print(f"There are {len(v)} categories")
        continue
    print(k, v)

There are 1000 categories
min_size (1, 1)
recipe https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1
num_params 5288548
_metrics {'ImageNet-1K': {'acc@1': 77.692, 'acc@5': 93.532}}
_docs These weights are ported from the original paper.


In [13]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms,
    batch_size=32)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x193a51f95b0>,
 <torch.utils.data.dataloader.DataLoader at 0x193a51f9550>,
 ['pizza', 'steak', 'sushi'])

# 3. Getting the pretrained model , freeze the feature block and modify the classification block

## 3.1 Loading to pretrain model

In [14]:
# getting the pretrained EfficientNetBO model
model_0 = torchvision.models.efficientnet_b0(
    weights=torchvision.models.EfficientNet_B0_Weights.DEFAULT).to(device)
model_0

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [15]:
from going_modular import utils

In [25]:
utils.print_model_meta_info(model_0)

There are 3 module namely
	 features module contains 9 layers/block.
	 avgpool module contains 1 layer.
	 classifier module contains 2 layers/block.
Total layer are 263
defaultdict(<class 'int'>, {'EfficientNet': 1, 'Sequential': 25, 'Conv2dNormActivation': 49, 'Conv2d': 81, 'BatchNorm2d': 49, 'SiLU': 49, 'MBConv': 16, 'SqueezeExcitation': 16, 'AdaptiveAvgPool2d': 17, 'Sigmoid': 16, 'StochasticDepth': 16, 'Dropout': 1, 'Linear': 1})


## 3.2 Freeze the base layer

In [34]:
torchinfo.summary(model_0, col_names=["num_params", "trainable"], col_width=16)
#? Trainable params: 5,288,548

Layer (type:depth-idx)                                  Param #          Trainable
EfficientNet                                            --               True
├─Sequential: 1-1                                       --               True
│    └─Conv2dNormActivation: 2-1                        --               True
│    │    └─Conv2d: 3-1                                 864              True
│    │    └─BatchNorm2d: 3-2                            64               True
│    │    └─SiLU: 3-3                                   --               --
│    └─Sequential: 2-2                                  --               True
│    │    └─MBConv: 3-4                                 1,448            True
│    └─Sequential: 2-3                                  --               True
│    │    └─MBConv: 3-5                                 6,004            True
│    │    └─MBConv: 3-6                                 10,710           True
│    └─Sequential: 2-4                                  --   

In [35]:
# track the grad == False to freeze the learning of base layer
for params in model_0.features.parameters():
    params.requires_grad = False

# adjust the classifier head
set_seed()
model_0.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names))
).to(device)

In [38]:
torchinfo.summary(model_0, col_names=["num_params", "trainable"], col_width=16)
#? Trainable params: 3,843

Layer (type:depth-idx)                                  Param #          Trainable
EfficientNet                                            --               Partial
├─Sequential: 1-1                                       --               False
│    └─Conv2dNormActivation: 2-1                        --               False
│    │    └─Conv2d: 3-1                                 (864)            False
│    │    └─BatchNorm2d: 3-2                            (64)             False
│    │    └─SiLU: 3-3                                   --               --
│    └─Sequential: 2-2                                  --               False
│    │    └─MBConv: 3-4                                 (1,448)          False
│    └─Sequential: 2-3                                  --               False
│    │    └─MBConv: 3-5                                 (6,004)          False
│    │    └─MBConv: 3-6                                 (10,710)         False
│    └─Sequential: 2-4                           

# 4. Train a single model and track result

In [54]:
# define loss optimizer and metrics
from torchmetrics import Accuracy

cross_entropy_loss_fn = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(params=model_0.parameters(),
                                  lr=0.001)
accuracy_fn = Accuracy(task="multiclass",
                       num_classes=len(class_names)).to(device)

In [47]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x193ad434760>

In [52]:
from going_modular.engine import train_step, test_step
import numpy as np
from typing import List, Dict, Tuple

from tqdm.auto import tqdm
from torchmetrics import Metric
from torch.nn.modules.loss import _Loss

In [53]:
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: _Loss,
          metric_fn: Metric,
          epochs: int,
          device: str) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
      model: A PyTorch model to be trained and tested.
      train_dataloader: A DataLoader instance for the model to be trained on.
      test_dataloader: A DataLoader instance for the model to be tested on.
      optimizer: A PyTorch optimizer to help minimize the loss function.
      loss_fn: A PyTorch loss function to calculate loss on both datasets.
      metric_fn: To measure the performance of madel using metric function
      epochs: An integer indicating how many epochs to train for.
      device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
      A dictionary of training and testing loss as well as training and
      testing accuracy metrics. Each metric has a value in a list for
      each epoch.
      In the form: {train_loss: [...],
                    train_acc: [...],
                    test_loss: [...],
                    test_acc: [...]}
      For example if training for epochs=2:
                   {train_loss: [2.0616, 1.0537],
                    train_acc: [0.3945, 0.3945],
                    test_loss: [1.2641, 1.5706],
                    test_acc: [0.3400, 0.2973]}
    """
    history = defaultdict(list)

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           metric_fn=metric_fn,
                                           device=device)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        metric_fn=metric_fn,
                                        device=device)

        # Print out what's happening
        print(
            f"Epoch: {epoch + 1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )
        # write  the log to tensor board
        # to track the both training and test loss
        writer.add_scalars(main_tag="Loss",
                           tag_scalar_dict={"train_loss": train_loss,
                                            "test_loss": test_loss},
                           global_step=epoch)
        # to track the both training and trest accuracy
        writer.add_scalars(main_tag="Accuracy",
                           tag_scalar_dict={"train_acc": train_acc,
                                            "test_acc": test_acc},
                           global_step=epoch)
        # to add the graph
        writer.add_graph(model=model, input_to_model=torch.rand(size=(32, 3, 224, 224)))

        # Update history
        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["test_loss"].append(test_loss)
        history["test_acc"].append(test_acc)

    writer.close()
    return history

In [55]:
train(model=model_0,
      train_dataloader=train_dataloader,
      test_dataloader=test_dataloader,
      optimizer=adam_optimizer,
      loss_fn=cross_entropy_loss_fn,
      metric_fn=accuracy_fn,
      epochs=5,
      device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9610 | train_acc: 0.5542 | test_loss: 0.6685 | test_acc: 0.8597
Epoch: 2 | train_loss: 0.6612 | train_acc: 0.8583 | test_loss: 0.5543 | test_acc: 0.8852
Epoch: 3 | train_loss: 0.5512 | train_acc: 0.8562 | test_loss: 0.4553 | test_acc: 0.8847
Epoch: 4 | train_loss: 0.4650 | train_acc: 0.8729 | test_loss: 0.4159 | test_acc: 0.9006
Epoch: 5 | train_loss: 0.4464 | train_acc: 0.8562 | test_loss: 0.4008 | test_acc: 0.8886


defaultdict(list,
            {'train_loss': [0.9609946131706237,
              0.6611876567204793,
              0.551163645585378,
              0.465001650651296,
              0.44638938307762144],
             'train_acc': [0.5541666666666667,
              0.8583333333333333,
              0.85625,
              0.8729166666666667,
              0.85625],
             'test_loss': [0.6684799,
              0.55428,
              0.45533627,
              0.41587442,
              0.40077677],
             'test_acc': [0.8596591,
              0.8852273,
              0.8846591,
              0.9005682,
              0.8886364]})