In [1]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torchmetrics import Accuracy

try:
    from torchinfo import summary
except:
    print(f"[INFO] Couldn't find the torchinfo...\n Installing it.....")
    !pip install -qq torchinfo
    from torchinfo import summary

# Try to import going_moduler directory. download it from GitHub, if it doesn't work
try:
    from going_modular import data_setup, engine, utils
except:
    print(f"[INFO] Couldn't find the directory...\n Downloading it from github.....")
    !git clone https://github.com/Niloy009/learning_pytorch.git
    !mv leaning_pytorch/going_modular
    !rm -rf learning_pytorch
    from going_modular import data_setup, engine, utils

In [2]:
# Setup device agnostic code
device = 'cuda' if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# Set the seed
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """

    # set the seed for the general torch operation
    torch.manual_seed(seed)

    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

# Get the data

In [4]:
import os
import zipfile

from pathlib import Path

import requests



def download_data(source: str, 
                  destination: str, 
                  remove_source: bool = True) -> Path:
    """Download a ziiped dataset from source and unzip to destination

    Args:
        source: The source path where the data will download from.
        destination: The destination path where the data will download and unzip to.
        remove_source: Whether the source remove or not after download.
        
    Returns:
        pathlib.Path to downloaded data.
    """


    # Setup data path
    data_path = Path("data/")
    image_path = data_path / destination # images from a subset of classes from the Food101 dataset

    # If the image folder doesn't exist, download it and prepare it...
    if image_path.is_dir():
      print(f"[INFO] {image_path} directory exists, skipping re-download.")
    else:
      print(f"[INFO] Did not find {image_path}, downloading it...")
      image_path.mkdir(parents=True, exist_ok=True)

      # Download pizza, steak, sushi data
      target_file = Path(source).name
      with open(data_path / target_file, "wb") as f:
        request = requests.get(source)
        print(f"[INFO] Downloading {target_file} from {source}...")
        f.write(request.content)
  
      # unzip pizza, steak, sushi data
      with zipfile.ZipFile(data_path / target_file, "r") as zip_ref:
        print(f"[INFO] Unzipping {target_file}...")
        zip_ref.extractall(image_path)
  
      # Remove .zip file
      if remove_source:
          os.remove(data_path / target_file)

    return image_path

In [5]:
image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip", 
              destination="pizza_steak_sushi")

[INFO] data/pizza_steak_sushi directory exists, skipping re-download.


# Manual Transform and create dataset and dataloader

In [6]:
# Setup directories
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [7]:
# Setup ImageNet normalization levels (turns all images into similar distribution as ImageNet)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# Create transform pipeline manually
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])           
print(f"[INFO] Manually created transforms: {manual_transforms}")

train_dataloader_manual, test_dataloaler_manual, class_names = data_setup.create_dataloaders(train_dir=train_dir, 
                                                                                test_dir=test_dir, 
                                                                                transform=manual_transforms, 
                                                                                batch_size=32)
train_dataloader_manual, test_dataloaler_manual, class_names

[INFO] Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


(<torch.utils.data.dataloader.DataLoader at 0x7f0a42980b30>,
 <torch.utils.data.dataloader.DataLoader at 0x7f0b54bad760>,
 ['pizza', 'steak', 'sushi'])

# Automate Transform

In [8]:
# Setup pretrained weights (plenty of these available in torchvision.models)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Get transforms from weights (these are the transforms that were used to obtain the weights)
automatic_transforms = weights.transforms() 
print(f"[INFO] Automatically created transforms: {automatic_transforms}")


train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir, 
                                                                                test_dir=test_dir, 
                                                                                transform=automatic_transforms, 
                                                                                batch_size=32)
train_dataloader, test_dataloader, class_names

[INFO] Automatically created transforms: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x7f0a42980230>,
 <torch.utils.data.dataloader.DataLoader at 0x7f0a42980cb0>,
 ['pizza', 'steak', 'sushi'])

# Get Pretrained Model 

In [9]:
# Download the weights of pretrained model Efficientnet_B0
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Set up the model with weights and send it to the device
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

# View the model
# model

In [10]:
# Freeze all base layers by setting attribute required_grad to False
for param in model.features.parameters():
    param.requires_grad = False

# Since we're creating a new layer with random weights (torch.nn.Linear), 
# let's set the seeds
set_seeds()

model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
)


In [11]:
from torchinfo import summary


# Get a summary of the model
summary(model=model, 
        input_size=(32,3,224,224), 
        verbose=0, 
        col_names=['input_size', 'output_size', 'num_params', 'trainable'], 
        col_width=18, row_settings=['var_names'])


Layer (type (var_name))                                      Input Shape        Output Shape       Param #            Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]  [32, 3]            --                 Partial
├─Sequential (features)                                      [32, 3, 224, 224]  [32, 1280, 7, 7]   --                 False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]  [32, 32, 112, 112] --                 False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]  [32, 32, 112, 112] (864)              False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112] [32, 32, 112, 112] (64)               False
│    │    └─SiLU (2)                                         [32, 32, 112, 112] [32, 32, 112, 112] --                 --
│    └─Sequential (1)                                        [32, 32, 112, 112] [32, 16, 112, 112] --                 False
│    

# Train a single model and track it

In [12]:
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
accuracy = Accuracy(task='multiclass', num_classes=len(class_names)).to(device)

In [13]:
# Setup Summary writer
try:
    from torch.utils.tensorboard import SummaryWriter
except:
    print(f'[INFO]: Could not find tensorboard..... installing it!! ')
    !pip install -qq tensorboard
    from torch.utils.tensorboard import SummaryWriter
    

writer = SummaryWriter()
writer


<torch.utils.tensorboard.writer.SummaryWriter at 0x7f0a4275c2c0>

In [14]:
import torchmetrics
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

from going_modular.engine import train_step, test_step


def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          loss_fn: torch.nn.Module, 
          optimizer: torch.optim.Optimizer,
          accuracy: torchmetrics.classification.accuracy.Accuracy,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and test a PyTorch model

    Passes a target PyTorch model through train_step() and test_step()
    functions for a number of epochs. training and testing the model in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
        model: A PyTorch model to be tested.
        train_dataloader: A DataLoader instance for the model to be trained on.
        test_dataloader: A DataLoader instance for the model to be tested on.
        loss_fn: A PyTorch loss function to calculate loss on the test data.
        optimizer: A PyTorch optimizer to help minimize the loss function.
        accuracy: A torchmetric module to calculate accuracy.
        epochs: An integar indicating how many epochs to train for.
        device: A target device to compute on (i.e. "cuda" or "cpu")

    Returns:
        A dictionary of training and testing loss as well as training and
        testing accuracy metrics. Each metric has a value in a list for 
        each epoch.
        In the form: {train_loss: [...],
                      train_acc: [...],
                      test_loss: [...],
                      test_acc: [...]} 
        For example if training for epochs=2: 
                     {train_loss: [2.0616, 1.0537],
                      train_acc: [0.3945, 0.3945],
                      test_loss: [1.2641, 1.5706],
                      test_acc: [0.3400, 0.2973]} 
  """
    # Create empty results dictionary
    results = { "train_loss": [], 
                "train_accuracy": [], 
                "test_loss": [], 
                "test_accuracy": []
              }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_accuracy = train_step(model=model, 
                                                dataloader=train_dataloader, 
                                                loss_fn=loss_fn, 
                                                optimizer=optimizer, 
                                                accuracy=accuracy, 
                                                device=device)
        test_loss, test_accuracy = test_step(model=model, 
                                             dataloader=test_dataloader, 
                                             loss_fn=loss_fn,
                                             accuracy=accuracy, 
                                             device=device)
        
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss: .4f} | "
            f"train_accuracy: {train_accuracy: .4f} | "
            f"test_loss: {test_loss: .4f} | "
            f"test_accuracy: {test_accuracy: .4f}"
        )
        # 5. update the results
        results["train_loss"].append(train_loss)
        results["train_accuracy"].append(train_accuracy)
        results["test_loss"].append(test_loss)
        results["test_accuracy"].append(test_accuracy)

        #### New: Experiment tracking with tensorboard ####
        writer.add_scalars(main_tag="Loss", 
                           tag_scalar_dict={"train_loss": train_loss, 
                                            "test_loss": test_loss}, 
                           global_step=epoch)
        
        writer.add_scalars(main_tag="Accuracy", 
                           tag_scalar_dict={"train_accuracy": train_accuracy, 
                                            "test_accuracy": test_accuracy}, 
                           global_step=epoch)

        writer.add_graph(model=model, input_to_model=torch.randn(32,3,224,224).to(device))

        # Close the writer
        writer.close()

        #### End: Experiment tracking with tensorboard ####
        

    return results



In [15]:
# Train Model
# Note: Not using engine.train() as we modified the function above

set_seeds()
results = train(model=model, 
                train_dataloader=train_dataloader, 
                test_dataloader=test_dataloader, 
                loss_fn=loss_fn, 
                optimizer=optimizer,
                accuracy=accuracy, 
                epochs=5, 
                device=device)


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss:  1.0948 | train_accuracy:  0.3984 | test_loss:  0.9034 | test_accuracy:  0.6411
Epoch: 2 | train_loss:  0.9005 | train_accuracy:  0.6445 | test_loss:  0.7874 | test_accuracy:  0.8561
Epoch: 3 | train_loss:  0.8115 | train_accuracy:  0.7500 | test_loss:  0.6749 | test_accuracy:  0.8759
Epoch: 4 | train_loss:  0.6853 | train_accuracy:  0.7383 | test_loss:  0.6704 | test_accuracy:  0.8352
Epoch: 5 | train_loss:  0.7091 | train_accuracy:  0.7383 | test_loss:  0.6768 | test_accuracy:  0.8040


In [19]:
# Lets view our experiment
# %load_ext tensorboard
# %tensorboard --logdir runs

# Create a function to prepare a `SummaryWriter()` instance

By default our `SummaryWriter()` class aves to `log_dir`.

How about if we wanted to save different experiments to different folders?

in simple word **one experiment = one folder**

For example, we'd like to track:

* Experiment data/timestamp
* Experiment name
* Model name
* Extra - is there anything else that should be tracked?

Let's create a function to create a `SummaryWriter()` instance to take all of these things into account.

So ideally we end up tracking experiment to a directory:

`runs/YYYY-MM-DD/experiment_name/model_name/extra`


In [25]:
from datetime import datetime
import os

from torch.utils.tensorboard import SummaryWriter

def create_writer(experiment_name: str, model_name: str, extra: str = None) -> torch.utils.tensorboard.writer.SummaryWriter():
    """Create a torch.utils.tensorboard.writer.SummaryWriter() instance saving to a specific log_dir
    
    log_dir is a combination of runs/timestamp/experiment_name/model_name/extra

    Where timestamp is current date in YYYY-MM-DD format

    Args:
        experiment_name (str): Name of the experiment.
        model_name (str): Name of the model
        extra (str, optional): Anything extra to add to the directory.

    Returns:
        torch.utils.tensorboard.writer.SummaryWriter(): Instance of a writer saving to the specific log_dir.

    Example usage:
        # Create a writer saving to "runs/2025-Apr-05/data_10_percent/effnetb2/5_epochs/"
        writer = create_writer(experiment_name="data_10_percent",
                               model_name="effnetb2",
                               extra="5_epochs")
        # The above is the same as:
        writer = SummaryWriter(log_dir="runs/2022-06-04/data_10_percent/effnetb2/5_epochs/")
    """

    # Get timestamp of current date in reverse order (YYYY-MM-DD)
    timestamp = datetime.now().strftime("%Y-%b-%d")

    if extra:
        # create log directory path
        log_dir = os.path.join('runs', timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join('runs', timestamp, experiment_name, model_name)

    print(f"[INFO] Created SummaryWriter saving to {log_dir}")
    return SummaryWriter(log_dir=log_dir)
    

In [26]:
example_writer = create_writer(experiment_name='data_10_percent', model_name="efficientnetb0", extra='5_epochs')
example_writer

[INFO] Created SummaryWriter saving to runs/2025-Apr-05/data_10_percent/efficientnetb0/5_epochs


<torch.utils.tensorboard.writer.SummaryWriter at 0x7f0a2041c800>

In [None]:
import torchmetrics
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

from going_modular.engine import train_step, test_step


def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          loss_fn: torch.nn.Module, 
          optimizer: torch.optim.Optimizer,
          accuracy: torchmetrics.classification.accuracy.Accuracy,
          epochs: int,
          device: torch.device, 
          writer: torch.utils.tensorboard.writer.SummaryWriter) -> Dict[str, List]:
    """Trains and test a PyTorch model

    Passes a target PyTorch model through train_step() and test_step()
    functions for a number of epochs. training and testing the model in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.
    
    Stores metrics to specified writer log_dir if present.
    
    Args:
        model: A PyTorch model to be tested.
        train_dataloader: A DataLoader instance for the model to be trained on.
        test_dataloader: A DataLoader instance for the model to be tested on.
        loss_fn: A PyTorch loss function to calculate loss on the test data.
        optimizer: A PyTorch optimizer to help minimize the loss function.
        accuracy: A torchmetric module to calculate accuracy.
        epochs: An integar indicating how many epochs to train for.
        device: A target device to compute on (i.e. "cuda" or "cpu").
        writer: A SummaryWriter() instance to log model results to.


    Returns:
        A dictionary of training and testing loss as well as training and
        testing accuracy metrics. Each metric has a value in a list for 
        each epoch.
        In the form: {train_loss: [...],
                      train_acc: [...],
                      test_loss: [...],
                      test_acc: [...]} 
        For example if training for epochs=2: 
                     {train_loss: [2.0616, 1.0537],
                      train_acc: [0.3945, 0.3945],
                      test_loss: [1.2641, 1.5706],
                      test_acc: [0.3400, 0.2973]} 
  """
    # Create empty results dictionary
    results = { "train_loss": [], 
                "train_accuracy": [], 
                "test_loss": [], 
                "test_accuracy": []
              }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_accuracy = train_step(model=model, 
                                                dataloader=train_dataloader, 
                                                loss_fn=loss_fn, 
                                                optimizer=optimizer, 
                                                accuracy=accuracy, 
                                                device=device)
        test_loss, test_accuracy = test_step(model=model, 
                                             dataloader=test_dataloader, 
                                             loss_fn=loss_fn,
                                             accuracy=accuracy, 
                                             device=device)
        
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss: .4f} | "
            f"train_accuracy: {train_accuracy: .4f} | "
            f"test_loss: {test_loss: .4f} | "
            f"test_accuracy: {test_accuracy: .4f}"
        )
        # 5. update the results
        results["train_loss"].append(train_loss)
        results["train_accuracy"].append(train_accuracy)
        results["test_loss"].append(test_loss)
        results["test_accuracy"].append(test_accuracy)

        #### New: Experiment tracking with tensorboard ####
        if writer:
            writer.add_scalars(main_tag="Loss", 
                               tag_scalar_dict={"train_loss": train_loss, 
                                                "test_loss": test_loss}, 
                               global_step=epoch)
            
            writer.add_scalars(main_tag="Accuracy", 
                               tag_scalar_dict={"train_accuracy": train_accuracy, 
                                                "test_accuracy": test_accuracy}, 
                               global_step=epoch)
    
            writer.add_graph(model=model, input_to_model=torch.randn(32,3,224,224).to(device))
    
            # Close the writer
            writer.close()
        else:
            pass

        #### End: Experiment tracking with tensorboard ####
        

    return results

