## PyTorch Experiment tracking.

Helps track the experiments and figure out what does not work.

In [59]:
import torch
import torchvision
from torch import nn
import matplotlib.pyplot as plt
from torchinfo import summary

from going_modular import data_setup, engine

print(torch.__version__)
print(torchvision.__version__)

2.6.0+cu126
0.21.0+cu126


In [60]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [61]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

In [62]:
set_seeds()

## Get data

Want to get pizza, steak and sushi images.

So we can run experiment and track the best model.

In [63]:
import os
import zipfile

from pathlib import Path

import requests

def download_data(source: str,
                 destination: str,
                 removed_source: bool = True) -> Path:
    """Downloads a zipped dataset from source and unzips to destination."""

    # Setup path to data folder
    data_path = Path("data/")
    image_path = data_path/destination

    #If image folder doesn't exist, create it
    if image_path.is_dir():
        print(f"[INFO] {image_path} directory already exists")
    else:
        print(f"[INFO] could not find {image_path} , creating one...")
        image_path.mkdir(parents=True, exist_ok=True)

        # Download the target data
        target_file = Path(source).name
        with open(data_path/target_file, "wb") as f:
            request = requests.get(source)
            print(f"[INFO] Downloading {target_file} from {source}...")
            f.write(request.content)

        #Unzip target file
        with zipfile.ZipFile(data_path/target_file, "r") as zip_ref:
            print(f"[INFO] Unzipping")
            zip_ref.extractall(image_path)

        # Remove the .zip file
        if removed_source:
            os.remove(data_path/target_file)

    return image_path

In [64]:
image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi.zip",
                          destination="pizza_steak_sushi")
image_path

[INFO] data\pizza_steak_sushi directory already exists


WindowsPath('data/pizza_steak_sushi')

## Create datasets and dataloaders


### Create data loaders with manual transforms

In [65]:
image_path

WindowsPath('data/pizza_steak_sushi')

In [66]:
# Setup directores
train_dir = image_path / "train"
test_dir = image_path / "test"


train_dir, test_dir

(WindowsPath('data/pizza_steak_sushi/train'),
 WindowsPath('data/pizza_steak_sushi/test'))

In [67]:
# Create transform pipeline manually
from torchvision import transforms

# Setup ImageNet normalization levels
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])


manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

print(f"Manual tranforms {manual_transforms}")

# Cretae DataLoaders
from going_modular import data_setup
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                              transform=manual_transforms,
                                                                              batch_size=32)

train_dataloader, test_dataloader, class_names

Manual tranforms Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


(<torch.utils.data.dataloader.DataLoader at 0x25356824f40>,
 <torch.utils.data.dataloader.DataLoader at 0x25356825ba0>,
 ['pizza', 'steak', 'sushi'])

## Create Dataloaders using automatic transforms

In [68]:
# Setup dirs
train_dir = image_path / "train"
test_dir = image_path / "test"

# Setup the pretrained weights
import torchvision
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Get the tranforms from weights
automatic_tranforms = weights.transforms()
print(automatic_tranforms)


# Create data loaders
train_dataloader, test_dataloaders, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=automatic_tranforms,
                                                                               batch_size=32)

train_dataloader, test_dataloader, class_names

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x25356825660>,
 <torch.utils.data.dataloader.DataLoader at 0x25356825ba0>,
 ['pizza', 'steak', 'sushi'])

# Get a pretrained model and freeze the base layers

In [69]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [70]:
# New method of creating a pretrained model (torchvision v0.13+)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT  # DEFAULT is best available weights
model = torchvision.models.efficientnet_b0(weights=weights).to(device)
# model

In [71]:
model.avgpool

AdaptiveAvgPool2d(output_size=1)

In [72]:
model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [73]:
# Print a summary with torchinfo
from torchinfo import summary

summary(model=model,
       input_size=(1, 3, 224, 224),
       col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20,
       row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 1000]            --                   True
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   True
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   True
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    864                  True
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    64                   True
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 16, 112,

### Freezing the base model and changing the output layer to suit our needs

We freeze the base layers and train the output layers.

In [74]:
# Freeze all of the base layers in EffNetB0
for param in model.features.parameters():
    # print(param)
    param.requires_grad = False

In [75]:
len(class_names)

3

In [76]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Update the classifier head of model to suit our problem
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280,
             out_features=len(class_names))
).to(device)

model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=3, bias=True)
)

In [78]:
summary(model=model,
       input_size=(32, 3, 224, 224),
       col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20,
       row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

## Train a single model and track results

In [79]:
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

To track experiment we are going to use tensorboard

In [82]:
# Setup a SummaryWriter
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x25345ca6470>

In [95]:
import torch
from typing import Dict, List, Tuple
from tqdm.auto import tqdm

from going_modular.engine import train_step, test_step

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.
    
    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.
    
    Calculates, prints and stores evaluation metrics throughout.
    
    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").
    
    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
    "train_acc": [],
    "test_loss": [],
    "test_acc": []
    }
    
    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)
        
        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )
        
        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        
        ## New: Experiment tracking
        writer.add_scalars(main_tag="Loss",
                        tag_scalar_dict={"train_loss": train_loss,
                                        "test_loss": test_loss},
                         global_step=epoch)
        
        writer.add_scalars(main_tag="Accuracy",
                        tag_scalar_dict={"train_acc": train_acc,
                                        "test_acc": test_acc},
                        global_step=epoch)
        
        writer.add_graph(model=model,
                      input_to_model=torch.randn(32, 3, 224, 224).to(device))

    ## Close the writer
    writer.close()

    
    # Return the filled results at the end of the epochs
    return results

In [96]:
# Train the model
# Using the modified train function
set_seeds()
results = train(model=model,
               train_dataloader=train_dataloader,
               test_dataloader=test_dataloader,
               optimizer=optimizer,
               loss_fn=loss_fn,
               epochs=5,
               device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.5407 | train_acc: 0.9180 | test_loss: 0.6289 | test_acc: 0.8160
Epoch: 2 | train_loss: 0.5485 | train_acc: 0.7969 | test_loss: 0.6050 | test_acc: 0.8021
Epoch: 3 | train_loss: 0.5365 | train_acc: 0.8125 | test_loss: 0.5342 | test_acc: 0.8299
Epoch: 4 | train_loss: 0.4608 | train_acc: 0.9297 | test_loss: 0.5228 | test_acc: 0.9306
Epoch: 5 | train_loss: 0.4962 | train_acc: 0.7812 | test_loss: 0.5652 | test_acc: 0.8368


## View our models results with tensorboard

## View our models results with tensorboard

In [98]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Create a function to prepare a `SummaryWriter()` instance

How about saving different experiments to different folders?

In [103]:
from torch.utils.tensorboard import SummaryWriter

def create_writer(experiment_name: str,
                 model_name: str,
                 extra: str = None):
    """Create a torch.utils.tensorboard.writer.SummaryWriter() instance tracking to a specific directory."""
    from datetime import datetime
    import os

    # Get timestamp of current dat in reverse order
    timestamp = datetime.now().strftime("%Y-%m-%d")

    if extra:
        # create log directory path
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)

    return SummaryWriter(log_dir=log_dir)

In [104]:
example_writer = create_writer(experiment_name="data_10_percent",
                              model_name="effnetb0",
                              extra="5_epochs")
example_writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x253a5e760b0>

## Update the train function to include the writer parameter

In [105]:
import torch
from typing import Dict, List, Tuple
from tqdm.auto import tqdm

from going_modular.engine import train_step, test_step

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device,
         writer: torch.utils.tensorboard.writer.SummaryWriter) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.
    
    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.
    
    Calculates, prints and stores evaluation metrics throughout.
    
    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").
    
    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
    "train_acc": [],
    "test_loss": [],
    "test_acc": []
    }
    
    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)
        
        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )
        
        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        
        ## New: Experiment tracking
        if writer:
            writer.add_scalars(main_tag="Loss",
                            tag_scalar_dict={"train_loss": train_loss,
                                            "test_loss": test_loss},
                             global_step=epoch)
            
            writer.add_scalars(main_tag="Accuracy",
                            tag_scalar_dict={"train_acc": train_acc,
                                            "test_acc": test_acc},
                            global_step=epoch)
            
            writer.add_graph(model=model,
                          input_to_model=torch.randn(32, 3, 224, 224).to(device))

            ## Close the writer
            writer.close()
        else:
            pass
    
    # Return the filled results at the end of the epochs
    return results

## Setting up a series of modelling experiments

### What kind of experiments should you run?

- Change number of epocs
- Change number of hidden layers, etc.

## What experiments we are going to do.

We are going to try 3 things:
- model size - EffnetB0 vs EffnetB2
- dataset size - 10% of pizza, sushi , steak vs 20%
- training time - 5 vs 10 epochs

### Download different datasets

We want 2 datasets 10% and 20% of pizza steak sushi.

In [106]:
# Download 10 and 20 percent datasets
data_10_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi.zip",
                                    destination="pizza_steak_sushi")

data_20_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi_20_percent.zip",
                                    destination="pizza_steak_sushi_20_percent")

[INFO] data\pizza_steak_sushi directory already exists
[INFO] could not find data\pizza_steak_sushi_20_percent , creating one...
[INFO] Downloading pizza_steak_sushi_20_percent.zip from https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi_20_percent.zip...
[INFO] Unzipping


## Tranform datasets and create dataloaders

In [107]:
# Setup training dir path
train_dir_10_percent = data_10_percent_path / "train"
train_dir_20_percent = data_20_percent_path / "train"

# Setup test dir
test_dir = data_10_percent_path / "test"

train_dir_10_percent, train_dir_20_percent, test_dir

(WindowsPath('data/pizza_steak_sushi/train'),
 WindowsPath('data/pizza_steak_sushi_20_percent/train'),
 WindowsPath('data/pizza_steak_sushi/test'))

In [109]:
# Setup ImageNet normalization levels
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])


simple_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [112]:
BATCH_SIZE = 32

# Create 10% training and test DataLoaders
train_dataloader_10_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_10_percent,
                                                                                         test_dir=test_dir,
                                                                                         transform=simple_transforms,
                                                                                         batch_size=BATCH_SIZE)

# Create 20% training and test dataloaders
train_dataloader_20_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_20_percent,
                                                                                         test_dir=test_dir,
                                                                                         transform=simple_transforms,
                                                                                         batch_size=BATCH_SIZE)

print(f"batch size {BATCH_SIZE} in 10% {len(train_dataloader_10_percent)}")
print(f"batch size {BATCH_SIZE} in 120% {len(train_dataloader_20_percent)}")
print(f"batch size {BATCH_SIZE} in 10% {len(test_dataloaders)}")
print(class_names)

batch size 32 in 10% 8
batch size 32 in 120% 15
batch size 32 in 10% 3
['pizza', 'steak', 'sushi']


### Create feature extractor models

We want two functions
- Create a torchvision.model.efficientnet_b0 with frozen base layers and custom classifier.
- Create a torchvision.model.efficientnet_b2 with frozen base layers and custom classifier.

In [117]:
import torchvision

# Create an EffnetB2
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights)

# effnetb2

In [118]:
summary(model=effnetb2,
       input_size=(32, 3, 224, 224),
       col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20,
       row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

In [126]:
import torchvision
from torch import nn

OUT_FEATURES = len(class_names)

# Cretae an EffNetB0 feature extractor
def create_effnetb0():
    # Get the weight and setup the model
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    # Freeze the base model layers
    for param in model.features.parameters():
        param.requires_grad = False

    # Change the classifier head
    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES)
    ).to(device)

    # Give model a name
    model.name = "effnetb0"
    print(f"[INFO] Created model {model.name} model...")
    return model

# Cretae an EffNetB2 feature extractor
def create_effnetb2():
    # Get the weight and setup the model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    # Freeze the base model layers
    for param in model.features.parameters():
        param.requires_grad = False

    # Change the classifier head
    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1408, out_features=OUT_FEATURES)
    ).to(device)

    # Give model a name
    model.name = "effnetb2"
    print(f"[INFO] Created model {model.name} model...")
    return model

In [129]:
create_model_test_effnetb2 = create_effnetb2()
create_model_test_effnetb0 = create_effnetb0()

[INFO] Created model effnetb2 model...
[INFO] Created model effnetb0 model...


In [130]:
summary(model=create_model_test_effnetb2,
       input_size=(32, 3, 224, 224),
       col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20,
       row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [131]:
summary(model=create_model_test_effnetb0,
       input_size=(32, 3, 224, 224),
       col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20,
       row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 