In [4]:
%%writefile ../../modules/data_setup.py
"""
Module for creating PyTorch DataLoaders for image classification data
"""
import os
import torch
from pathlib import Path
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

NUM_WORKERS = os.cpu_count() or 1

def create_dataloaders(
    train_dir: Path,
    test_dir: Path,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS
):
    """Creates training and testing DataLoaders

    Takes in a training directory and a test directory, and turns them into PyTorch datasets and then into PyTorch DataLoaders

    Args:
        train_dir: Path to training directory
        test_dir: Path to testing directory
        transform: torchvision transforms to beperformed on training and testing data
        batch_size: Batch size for the DataLoaders
        num_workers: Number of workers for the DataLoaders

    Returns:
        A tuple of (train_dataloader, test_dataloader, class_names), where class_names is a list of the target classes.
    """

    train_dataset = datasets.ImageFolder(train_dir, transform=transform)
    test_dataset = datasets.ImageFolder(test_dir, transform=transform)

    class_names = train_dataset.classes

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
        pin_memory=torch.cuda.is_available()
    )

    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
        pin_memory=torch.cuda.is_available()
    )

    return train_dataloader, test_dataloader, class_names

Overwriting ../../modules/data_setup.py


In [1]:
%%writefile ../../modules/model_builder.py
"""
Module to instantiate a TinyVGG PyTorch model
"""
import torch
import torchvision
from torch import nn

class TinyVGG(nn.Module):
    """Creates the TinyVGG architecture
    
    Replicates, with some differences, the TinyVGG architecture from the CNN explainer website in PyTorch.
    Original architecture at: https://poloclub.github.io/cnn-explainer/.
    
    Args:
        input_shape: The number of input channels
        hidden_units: The number of hidden units between layers
        output_shape: The number of output units, ofter the number of classes
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_shape,
                out_channels=hidden_units,
                kernel_size=3,
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2
            )
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            # Using adaptive average to avoid hardcoding input image dimensions in the last linear layer
            nn.AdaptiveAvgPool2d((7, 7)),
            nn.Flatten(),
            nn.Linear(hidden_units*7*7, output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        return self.classifier(self.conv_block_2(self.conv_block_1(x)))

Overwriting ../../modules/model_builder.py


In [None]:
%%writefile ../../modules/engine.py
"""
Module contains functions to train and test a PyTorch model
"""
import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train_step(
    model: nn.Module,
    dataloader: torch.utils.data.DataLoader,
    loss_fn: nn.Module,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
) -> Tuple[float, float]:
    """Performs a single training step on a PyTorch model
    
    Args:
        model: PyTorch model to be trained
        dataloader: DataLoader instance containing training data
        loss_fn: Loss function to minimize
        optimizer: PyTorch optimizer used to minimize the loss function
        device: Target device to compute on
        
    Returns:
        A tuple of training loss and accuracy, in the form (train_loss, train_accuracy).
    """

    model.train()
    train_loss, train_acc = 0, 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X)

        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (torch.eq(y_pred_class, y)).sum().item()/len(y_pred)

    train_loss /= len(dataloader)
    train_acc /= len(dataloader)
    return (train_loss, train_acc)

def test_step(
    model: nn.Module,
    dataloader: torch.utils.data.DataLoader,
    loss_fn: nn.Module,
    device: torch.device
) -> Tuple[float, float]:
    """Performs a single test step on a PyTorch model
    
    Args:
        model: PyTorch model to be tested
        dataloader: DataLoader instance containing test data
        loss_fn: Loss function to evaluate model
        device: Target device to compute on

    Returns:
        A tuple of testing loss and accuracy, in the form (test_loss, test_accuracy).
    """
    model.eval()
    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
            
            y_pred = model(X)

            loss = loss_fn(y_pred, y)
            test_loss += loss.item()

            y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_acc += (torch.eq(y_pred_class, y)).sum().item()/len(y_pred)

        test_loss /= len(dataloader)
        test_acc /= len(dataloader)
        return (test_loss, test_acc)

def train(
    model: nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    loss_fn: nn.Module,
    optimizer: torch.optim.Optimizer,
    scheduler: torch.optim.lr_scheduler.LRScheduler | None = None,
    device: torch.device=torch.device("cpu"),
    epochs: int=5,
    writer: SummaryWriter=None
) -> Dict[str, List]:
    """Train and tests a PyTorch model

    Calls train_step() and test_steps() functions to train and test a model for a given number of epoches. Supports learning rate scheduling.
    Calculates, print and store training and testing metrics throughout for monitoring.

    Args:
        model: PyTorch model to be trained
        train_dataloader: DataLoader instance containing training data
        test_dataloader: DataLoader instance containing test data
        loss_fn: Loss function to minimize
        optimizer: PyTorch optimizer used to minimize the loss function
        scheduler: PyTorch learning rate scheduler for the optimizer, not mandatory
        device: Target device to compute on (default is "cpu")
        epochs: Number of epochs for the training (default is 5)
        writer: A SummaryWriter can be given to the function to be reasured

    Returns:
        A dictionary of training and testing loss and training and testing accuracy for each epoch.
        In the form: {train_loss: [...],
                    train_acc: [...],
                    test_loss: [...],
                    test_acc: [...]}
    """
    new_writer = writer is None
    if(new_writer):
        writer = SummaryWriter()
    
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model, train_dataloader, loss_fn, optimizer, device)
        test_loss, test_acc = test_step(model, test_dataloader, loss_fn, device)

        if(scheduler is not None):
            scheduler.step()

        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        writer.add_scalars(
            main_tag="Loss",
            tag_scalar_dict={"train_loss": train_loss, "test_loss": test_loss},
            global_step=epoch
        )

        writer.add_scalars(
            main_tag="Accuracy",
            tag_scalar_dict={"train_acc": train_acc, "test_acc": test_acc},
            global_step=epoch
        )

        writer.add_graph(model=model, input_to_model=torch.randn(32, 3, 224, 224).to(device))
    
    if new_writer:
        writer.close()

    return results

Overwriting ../../modules/engine.py


In [None]:
%%writefile ../../modules/utils.py
"""
Utility module that helps with training, saving and loading a PyTorch model
"""
import os
import sys
import torch
import zipfile
import requests
from pathlib import Path
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter 

def save_model(
    model: torch.nn.Module,
    target_dir_path: Path,
    model_name: str
):
    """Saves a PyTorch model to a target directory
"%Y-%m-%d"
    Args:
        model: PyTorch model to save
        target_dir: Directory where the model will be saved
        model_name: Name under which the model will be saved, should end in ".pth" of ".pt" as for naming conventions
    """
    target_dir_path.mkdir(parents=True, exist_ok=True)

    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name doesn't follow naming convention"
    model_save_path = target_dir_path / model_name

    print(f"Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(), f=model_save_path)

def download_data(
    source: str,
    destination: Path,
    remove_source: bool=True
) -> Path:
    """Downloads a zipped dataset from source and unzips to destination
    
    Args:
        source: A link to a zip archive containing data
        destination: The directory the datasets will be unzipped to
        remove_source: Wheter to remove the source after extracting
        
    Returns:
        pathlib.Path to the downloaded data directory
    """
    venv_dir = Path(sys.prefix)
    project_root = venv_dir.parent
    data_path = project_root/"data"
    image_path = data_path/destination

    if(image_path.is_dir() is False):
        image_path.mkdir(parents=True, exist_ok=True)

        target_file = Path(source).name
        with open(data_path/target_file, "wb") as f:
            request = requests.get(source)
            f.write(request.content)

        with zipfile.ZipFile(data_path/target_file, "r") as zip_ref:
            zip_ref.extractall(image_path)

        if remove_source:
            os.remove(data_path/target_file)
    
    return image_path

def create_writer(
    experiment_name: str,
    model_name: str,
    extra: str=None
) -> SummaryWriter():
    """Creates a SummaryWriter() instance that saves to a specific directory
    
    The logs directory is a combination of runs/timestamp/experiment_name/extra
    Where timestamp is the current date in YYYY-MM-DD format
    
    Args:
    - experiment_name: Name of the experiment
    - model_name: Name of the model
    - extra: Extra informations, defaults to None
    """
    timestamp = datetime.now().strftime("%Y-%m-%d")
    log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    if extra:
        log_dir = os.path.join(log_dir, extra)
    return SummaryWriter(log_dir=log_dir)

Overwriting ../../modules/utils.py


In [9]:
%%writefile ../../modules/train.py
"""
Trains and saves a PyTorch image classification model using device-agnostic code
"""
import os
import sys
import torch
import argparse
from pathlib import Path
from torchvision import transforms
import data_setup, engine, model_builder, utils

parser = argparse.ArgumentParser(description="Trains and saves a PyTorch image classification model using device-agnostic code")
parser.add_argument("--num_epochs", type=int, default=5, help="Number of epoches for the training and testing")
parser.add_argument("--batch_size", type=int, default=32, help="Dimension of every batch")
parser.add_argument("--hidden_units", type=int, default=10, help="Hidden units for every model layer, except for input and output")
parser.add_argument("--lr", type=float, default=0.001, help="Starting learning rate for the training")
args = parser.parse_args()

venv_dir = Path(sys.prefix)
project_root = venv_dir.parent
data_dir = project_root/"data"
train_dir = data_dir/"pizza_steak_sushi/train"
test_dir = data_dir/"pizza_steak_sushi/test"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataloader, test_dataloder, class_names = data_setup.create_dataloaders(train_dir, test_dir, data_transform, args.batch_size)

model = model_builder.TinyVGG(
    input_shape=3,
    hidden_units=args.hidden_units,
    output_shape=len(class_names)
).to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=args.num_epochs)

engine.train(
    model,
    train_dataloader,
    test_dataloder,
    loss_fn,
    optimizer,
    scheduler,
    device,
    args.num_epochs
)

models_dir = project_root/"trained_models"
utils.save_model(model, models_dir, "tinyvgg_from_cmd.pth")


Overwriting ../../modules/train.py
