In [1]:
import os
import requests
import zipfile
from pathlib import Path

# Setup path to data folder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

# If the image folder doesn't exist, download it and prepare it...
if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

# Download pizza, steak, sushi data
with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
    request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
    print("Downloading pizza, steak, sushi data...")
    f.write(request.content)

# Unzip pizza, steak, sushi data
with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
    print("Unzipping pizza, steak, sushi data...")
    zip_ref.extractall(image_path)

# Remove zip file
os.remove(data_path / "pizza_steak_sushi.zip")

Did not find data/pizza_steak_sushi directory, creating one...
Downloading pizza, steak, sushi data...
Unzipping pizza, steak, sushi data...


In [2]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int = 0,  # Default to 0 for cross-platform support
):
    """Creates training and testing DataLoaders.

    Args:
        train_dir: Path to training directory.
        test_dir: Path to testing directory.
        transform: torchvision transforms to perform on training and testing data.
        batch_size: Number of samples per batch in each of the DataLoaders.
        num_workers: Number of workers per DataLoader.

    Returns:
        A tuple of (train_dataloader, test_dataloader, class_names).
    """

    # Create train and test datasets
    train_data = datasets.ImageFolder(train_dir, transform=transform)
    test_data = datasets.ImageFolder(test_dir, transform=transform)

    class_names = train_data.classes

    # Convert datasets to DataLoaders
    train_dataloader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available()
    )

    test_dataloader = DataLoader(
        test_data,  # Fixed issue here
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available()
    )

    return train_dataloader, test_dataloader, class_names


In [3]:
import torch
import torch.nn as nn
import torchvision.models as models

class FoodImageClassifier(nn.Module):
    """
    A deep learning model based on EfficientNet-B0 for food image classification.

    Attributes:
        model (torchvision.models.EfficientNet): Pretrained EfficientNet-B0 model.
    """

    def __init__(self, fine_tune: bool, num_classes: int):
        """
        Initializes the EfficientNet-B0 model and modifies it for food classification.

        Args:
            fine_tune (bool): If True, allows training of EfficientNet’s feature extractor.
                              If False, freezes feature extractor layers.
            num_classes (int): Number of output classes for classification.
        """
        super().__init__()

        # Load EfficientNet-B0 with pretrained weights
        weights = models.EfficientNet_B0_Weights.DEFAULT
        self.model = models.efficientnet_b0(weights=weights)

        # Fine-tune logic
        if not fine_tune:
            print("[INFO]: Freezing feature extractor layers...")
            for param in self.model.features.parameters():
                param.requires_grad = False  # Freezes entire feature extractor

        # Modify classifier head to match the number of classes
        in_features = self.model.classifier[1].in_features  # Get input size of original classifier
        self.model.classifier = nn.Sequential(
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(in_features=in_features, out_features=num_classes, bias=True)  # Custom output layer
        )

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the model.

        Args:
            X (torch.Tensor): Input batch of images (B, C, H, W).

        Returns:
            torch.Tensor: Predicted class logits.
        """
        return self.model(X)


In [4]:
import torch
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
    """
    Saves the state dictionary of a PyTorch model to a specified directory.

    Args:
        model (torch.nn.Module): The PyTorch model to save.
        target_dir (str): The directory where the model should be saved.
        model_name (str): The name of the saved model file (must end with '.pt' or '.pth').

    Raises:
        AssertionError: If the model_name does not end with '.pt' or '.pth'.

    Example:
        save_model(model=my_model, target_dir="models", model_name="best_model.pth")
    """

    # Create target directory
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                            exist_ok=True)

    # Create model save path
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
    model_save_path = target_dir_path / model_name

    # Save the model state_dict()
    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(),
                f=model_save_path)


In [5]:
import torch
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
    """
    Performs a single training step on the given model using the provided dataloader.

    Args:
        model (torch.nn.Module): The neural network model to train.
        dataloader (torch.utils.data.DataLoader): The DataLoader providing training data.
        loss_fn (torch.nn.Module): The loss function to minimize.
        optimizer (torch.optim.Optimizer): The optimizer to update model weights.
        device (torch.device): The device to run the training (CPU/GPU).

    Returns:
        Tuple[float, float]: The average training loss and accuracy for the epoch.
    """

    model.train()  # Set the model to training mode

    train_loss, train_acc = 0, 0  # Initialize loss and accuracy accumulators

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)  # Move data to the specified device

        # 1. Forward pass: Compute model predictions
        y_pred = model(X)

        # 2. Compute loss and accumulate it
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. Zero gradients before backward pass
        optimizer.zero_grad()

        # 4. Backward pass: Compute gradients
        loss.backward()

        # 5. Update model weights using optimizer
        optimizer.step()

        # 6. Compute accuracy
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)  # Get predicted class labels
        train_acc += (y_pred_class == y).sum().item() / len(y_pred)  # Compute batch accuracy

    # Compute average loss and accuracy over all batches
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)

    return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
    """
    Evaluates the model on the test dataset.

    Args:
        model (torch.nn.Module): The trained model.
        dataloader (torch.utils.data.DataLoader): The DataLoader providing test data.
        loss_fn (torch.nn.Module): The loss function.
        device (torch.device): The device to run the evaluation (CPU/GPU).

    Returns:
        Tuple[float, float]: The average test loss and accuracy.
    """

    model.eval()  # Set model to evaluation mode (disables dropout, batch norm, etc.)

    test_loss, test_acc = 0, 0

    with torch.inference_mode():  # Disable gradient calculations for efficiency
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)  # Move data to the device

            # 1. Forward pass: Compute model predictions
            test_pred_logits = model(X)

            # 2. Compute and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            # 3. Compute accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)  # Get predicted class labels
            test_acc += (test_pred_labels == y).sum().item() / len(test_pred_labels)

    # Compute average loss and accuracy over all batches
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)

    return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          model_name: str,
          device: torch.device) -> Dict[str, List]:
    """
    Trains and evaluates the model for a given number of epochs.

    Args:
        model (torch.nn.Module): The neural network model.
        train_dataloader (torch.utils.data.DataLoader): DataLoader for training data.
        test_dataloader (torch.utils.data.DataLoader): DataLoader for testing data.
        optimizer (torch.optim.Optimizer): Optimizer for updating model parameters.
        loss_fn (torch.nn.Module): Loss function.
        epochs (int): Number of training epochs.
        model_name (str): Name of the model to save on directory.
        device (torch.device): Device to run the training (CPU/GPU).

    Returns:
        Dict[str, List]: Dictionary containing loss and accuracy for each epoch.
    """

    # Dictionary to store training and validation results
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    best_acc = 0.0
    # Loop over the specified number of epochs
    for epoch in tqdm(range(epochs), desc="Training Progress"):
        # Perform one training step
        train_loss, train_acc = train_step(
            model=model,
            dataloader=train_dataloader,
            loss_fn=loss_fn,
            optimizer=optimizer,
            device=device
        )

        # Perform one test step
        test_loss, test_acc = test_step(
            model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device
        )

        # Print results for current epoch
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        if test_acc > best_acc:
            best_acc = test_acc
            # Save the trained model
            save_model(model=model,
                            target_dir="models",
                            model_name=f"{model_name}.pth")
            print(f"New best model saved at epoch {epoch+1} with test_acc={test_acc:.4f}")

        # Store results in the dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results  # Return the dictionary with loss/accuracy logs


In [6]:
import torch
import argparse
from torchvision import transforms

# Setup target device
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

# Create transforms for image preprocessing
data_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
    )
])

# Load datasets using data_setup module
train_dataloader, test_dataloader, class_names = create_dataloaders(
    train_dir='data/pizza_steak_sushi/train',
    test_dir='data/pizza_steak_sushi/test',
    transform=data_transform,
    batch_size=32
)

# Dynamically create model based on dataset class count
food_model = FoodImageClassifier(
    fine_tune=True, num_classes=len(class_names)
).to(device)

# Define loss function and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(food_model.parameters(), lr=0.001)

# Train the model
train(model=food_model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=5,
             model_name="model",
             device=device)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 62.9MB/s]


Training Progress:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.6528 | train_acc: 0.8047 | test_loss: 0.4021 | test_acc: 0.8977
[INFO] Saving model to: models/model.pth
New best model saved at epoch 1 with test_acc=0.8977
Epoch: 2 | train_loss: 0.3343 | train_acc: 0.8477 | test_loss: 0.6698 | test_acc: 0.8873
Epoch: 3 | train_loss: 0.1721 | train_acc: 0.8711 | test_loss: 0.5694 | test_acc: 0.9176
[INFO] Saving model to: models/model.pth
New best model saved at epoch 3 with test_acc=0.9176
Epoch: 4 | train_loss: 0.1289 | train_acc: 0.9766 | test_loss: 0.3296 | test_acc: 0.9186
[INFO] Saving model to: models/model.pth
New best model saved at epoch 4 with test_acc=0.9186
Epoch: 5 | train_loss: 0.5630 | train_acc: 0.8594 | test_loss: 0.2259 | test_acc: 0.9280
[INFO] Saving model to: models/model.pth
New best model saved at epoch 5 with test_acc=0.9280


{'train_loss': [0.6528106555342674,
  0.3343472508713603,
  0.17210794799029827,
  0.12894430907908827,
  0.5629504243843257],
 'train_acc': [0.8046875, 0.84765625, 0.87109375, 0.9765625, 0.859375],
 'test_loss': [0.4021080533663432,
  0.6697933028141657,
  0.569399485985438,
  0.3296302941938241,
  0.22591078778107962],
 'test_acc': [0.8977272727272728,
  0.8873106060606061,
  0.9176136363636364,
  0.9185606060606061,
  0.9280303030303031]}