# Notebook2Script

Transfer messy_experiment.ipynb to `helper` folder.

## Data

In [1]:
%%writefile helper/setup_data.py
"""
Contains functions to download and setup the data
"""
import torchvision.transforms as T
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split
import kaggle
import zipfile
import shutil
import os
import json
from pathlib import Path

Overwriting helper/setup_data.py


In [2]:
%%writefile -a helper/setup_data.py

def get_data_transforms():
    """
    Returns the data transforms for training and validation/testing.

    Args:
        None
    
    Returns:
        train_transforms (torchvision.transforms.Compose): Transforms for training data
        valid_n_test_transforms (torchvision.transforms.Compose): Transforms for validation and testing data
    """
    train_transforms = T.Compose([T.RandomResizedCrop(224),
                                      T.RandomRotation(35),
                                      T.RandomVerticalFlip(0.27),
                                      T.RandomHorizontalFlip(0.27),
                                      T.ToTensor(),
                                      T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    test_transforms = T.Compose([T.Resize((224,224)),
                                       T.ToTensor(),
                                       T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    return train_transforms, test_transforms

Appending to helper/setup_data.py


In [3]:
%%writefile -a helper/setup_data.py

def download_and_extract_data():
    """
    Downloads the data from Kaggle and extracts it.

    Args:
        None
    
    Returns:
        data_path (pathlib.Path): Path to the extracted data
    """
    data_path = Path('data')
    if not data_path.exists():
        os.mkdir(data_path)
        path = Path('kmader/food41')
        kaggle.api.dataset_download_cli(str(path))
        zipfile.ZipFile('food41.zip').extractall(data_path)
    return data_path

def setup_folder():
    """
    Downloads and extracts the data, and sets up the folder structure
    for training and validation/testing.

    Args:
        None

    Returns:
        data_path (pathlib.Path): Path to the extracted data
    """
    data_path = download_and_extract_data()
    if os.path.exists(data_path/'train'):
        return data_path
    
    with open(data_path/'meta/meta/train.json', 'r') as fp:
        train_dict = json.load(fp)
    with open(data_path/'meta/meta/test.json', 'r') as fp:
        test_dict = json.load(fp)

    new_folders = ['train', 'test']
    for folder in new_folders:
        if not os.path.exists(data_path/folder):
            os.mkdir(data_path/folder)
        if folder == 'train':
            if not os.path.exists(data_path/'valid'):
                os.mkdir(data_path/'valid')
            for key, value in train_dict.items():
                train_value, valid_value = train_test_split(value, train_size=0.75)
                train_set, valid_set = set(train_value), set(valid_value)
                if not os.path.exists(data_path/folder/key):
                    os.mkdir(data_path/folder/key)
                if not os.path.exists(data_path/'valid'/key):
                    os.mkdir(data_path/'valid'/key)
                for image in os.listdir(data_path/'images'/key):
                    image_path = key + '/' + image
                    image_id = image_path.split('.')[0]
                    if image_id in train_set:
                        shutil.move(data_path/'images'/image_path, data_path/folder/image_path)
                    if image_id in valid_set:
                        shutil.copy(data_path/'images'/image_path, data_path/'valid'/image_path)
        else:
            for key, value in test_dict.items():
                test_set = set(value)
                if not os.path.exists(data_path/folder/key):
                    os.mkdir(data_path/folder/key)
                for image in os.listdir(data_path/'images'/key):
                    image_path = key + '/' + image
                    image_id = image_path.split('.')[0]
                    if image_id in test_set:
                        shutil.move(data_path/'images'/image_path, data_path/folder/image_path)
    shutil.rmtree(data_path/'images')
    return data_path

def get_data_loaders(train_transforms, test_transforms, batch_size=64):
    """
    Returns the data loaders for training, validation, and testing.

    Args:
        batch_size (int): Batch size for the data loaders
        
    Returns:
        train_loader (torch.utils.data.DataLoader): Data loader for training data
        valid_loader (torch.utils.data.DataLoader): Data loader for validation data
        test_loader (torch.utils.data.DataLoader): Data loader for testing data
        test_data (torchvision.datasets.ImageFolder): Testing data
    """
    data_path = setup_folder()

    train_data = datasets.ImageFolder(str(data_path/'train'), transform=train_transforms)
    valid_data = datasets.ImageFolder(str(data_path/'valid'), transform=test_transforms)
    test_data = datasets.ImageFolder(str(data_path/'test'), transform=test_transforms)

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    return train_loader, valid_loader, test_loader, test_data

Appending to helper/setup_data.py


In [4]:
%%writefile -a helper/setup_data.py

def get_metadata(dataset):
    """
    Returns the metadata for the dataset.

    Args:
        dataset (torchvision.datasets.ImageFolder): Dataset for which metadata is required
    
    Returns:
        classes (list): List of class names
        class_to_idx (dict): Dictionary mapping class names to indices
        idx_to_class (dict): Dictionary mapping indices to class names
    """
    classes, class_to_idx = dataset.classes, dataset.class_to_idx
    idx_to_class = {value: key for key, value in class_to_idx.items()}
    return classes, class_to_idx, idx_to_class

Appending to helper/setup_data.py


## Plotting

In [5]:
%%writefile helper/plot.py
"""
Contains functions for data visualization and plotting
"""
import torch
import matplotlib.pyplot as plt

Overwriting helper/plot.py


In [6]:
%%writefile -a helper/plot.py

def imshow(image, ax=None, title=None):
    """
    imshow for IMAGENET-style transformed images in
    torch.Tensor format.

    Args:
        image (torch.Tensor): Image to be plotted
        ax (matplotlib.axes.Axes): Axes object to plot the image on
        title (str): Title for the image
    
    Returns:
        ax (matplotlib.axes.Axes): Axes object with the image plotted
    """
    if ax is None:
        fig, ax = plt.subplots()
    
    # PyTorch tensors assume the color channel is the first dimension
    # but matplotlib assumes is the third dimension
    image = image.permute((1, 2, 0))
    
    # Undo preprocessing
    mean = torch.tensor([0.485, 0.456, 0.406])
    std = torch.tensor([0.229, 0.224, 0.225])
    image = (std * image + mean)
    
    
    ax.imshow(image)
    if title is not None:
        ax.set_title(title)
    
    return ax

Appending to helper/plot.py


In [7]:
%%writefile -a helper/plot.py

def image_grid(images, labels, idx_to_class, rows=2, cols=4, figsize=(20, 10)):
    """
    Plots a grid of images and their labels.

    Args:
        images (torch.Tensor): Images to be plotted
        labels (torch.Tensor): Labels for the images
        idx_to_class (dict): Dictionary mapping indices to class names
        rows (int): Number of rows in the grid
        cols (int): Number of columns in the grid
        figsize (tuple): Size of the figure
    
    Returns:
        None
    """
    _, axes = plt.subplots(rows, cols, figsize=figsize)
    for i, ax in enumerate(axes.flat):
        ax = imshow(images[i], ax=ax, title=idx_to_class[labels[i].item()])
    plt.tight_layout()
    plt.show()

Appending to helper/plot.py


In [8]:
%%writefile -a helper/plot.py

def plot_training(train_losses, valid_losses, train_accuracies, valid_accuracies):
    """
    Plots the training and validation losses and accuracies.
    
    Args:
        train_losses (list): List of training losses
        valid_losses (list): List of validation losses
        train_accuracies (list): List of training accuracies
        valid_accuracies (list): List of validation accuracies
    
    Returns:
        None
    """
    num_epochs = len(train_losses)
    epochs = range(1, num_epochs + 1)

    # Plot subplots for loss and accuracy
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

    # Plot loss subplot
    ax1.plot(epochs, train_losses, label='Training Loss')
    ax1.plot(epochs, valid_losses, label='Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training and Validation Loss')
    ax1.legend()

    # Plot accuracy subplot
    ax2.plot(epochs, train_accuracies, label='Training Accuracy')
    ax2.plot(epochs, valid_accuracies, label='Validation Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.set_title('Training and Validation Accuracy')
    ax2.legend()

    # Show the plot
    plt.show()

Appending to helper/plot.py


## Training

In [9]:
%%writefile helper/engine.py
"""
Contains functions for training and evaluating the model.
"""
from typing import Dict, List, Tuple
import gc
import torch
import torchmetrics
from tqdm.auto import tqdm

def cuda_collect():
    """
    Collects the garbage and empties the cache on GPU.
    """
    gc.collect()
    torch.cuda.empty_cache()

def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               scheduler: torch.optim.lr_scheduler.LRScheduler,
               accuracy_fn: torchmetrics.Metric,
               device: torch.device) -> Tuple[torch.Tensor, torch.Tensor]:
    train_loss, train_acc = 0, 0
    model.train()
    # Model is expected to be in GPU already
    for _, (images, labels) in tqdm(enumerate(data_loader),
                                        total=len(data_loader),
                                        desc='Training model:'):
        images, labels= images.to(device), labels.to(device)

        # 1. Forward pass
        preds = model(images)

        # 2. Calculate loss
        loss = loss_fn(preds, labels)
        train_loss += loss
        train_acc += accuracy_fn(preds.argmax(dim=1), labels)

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # 6. Garbage collection on GPU RAM
        if device == torch.device('cuda'):
            cuda_collect()
    scheduler.step()
    # Exponential learning rate scheduler reduces learning rate too fast
    # A better option is torch.optim.lr_scheduler.OneCycleLR from the paper
    # of https://arxiv.org/pdf/1708.07120.pdf on MNIST dataset
    # I changed from exponential learning rate to onecycle learning scheduler
    # as performance became slow after 70 epochs.

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}")
    return train_loss.cpu(), train_acc.cpu()

def test_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn: torchmetrics.Metric,
               device: torch.device) -> Tuple[torch.Tensor, torch.Tensor]:
    test_loss, test_acc = 0, 0
    model.eval()
    # Turn on inference context manager
    with torch.inference_mode():
        for images, labels in tqdm(data_loader,
                                    total=len(data_loader),
                                    desc='Making predictions:'):

            images, labels= images.to(device), labels.to(device)

            # 1. Forward pass
            preds = model(images)

            # 2. Calculate loss and accuracy
            test_loss += loss_fn(preds, labels)
            test_acc += accuracy_fn(preds.argmax(dim=1), labels)

            if device == torch.device('cuda'):
                cuda_collect()

        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}")
    return test_loss.cpu(), test_acc.cpu()

def train(model: torch.nn.Module,
          train_loader: torch.utils.data.DataLoader,
          valid_loader: torch.utils.data.DataLoader,
          loss_fn: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          scheduler: torch.optim.lr_scheduler.LRScheduler,
          accuracy_fn: torchmetrics.Metric,
          device: torch.device,
          epochs: int,
          threshold: List[float]) -> Dict[str, List[torch.Tensor]]:
    """
    Trains the model and evaluates it on the validation set.
    
    Args:
        model (torch.nn.Module): Model to be trained
        train_loader (torch.utils.data.DataLoader): Training data loader
        valid_loader (torch.utils.data.DataLoader): Validation data loader
        loss_fn (torch.nn.Module): Loss function
        optimizer (torch.optim.Optimizer): Optimizer
        scheduler (torch.optim.lr_scheduler.LRScheduler): Learning rate scheduler
        accuracy_fn (torchmetrics.Accuracy): Accuracy function
        device (torch.device): Device to run the training on
        epochs (int): Number of epochs to train the model for
        threshold (float): Threshold for early stopping
    
    Returns:
        Dictionary containing training and validation losses and accuracies for each epoch.
        In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]} 
        For example if training for epochs=2: 
                    {train_loss: [2.0616, 1.0537],
                    train_acc: [0.3945, 0.3945],
                    test_loss: [1.2641, 1.5706],
                    test_acc: [0.3400, 0.2973]} 
    """
    results = {"train_losses": [], "train_accuracies": [],
               "valid_losses": [], "valid_accuracies": []}
    
    tolerance = 0
    threshold = torch.Tensor(threshold)
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model, train_loader, loss_fn, optimizer, scheduler, accuracy_fn, device)
        valid_loss, valid_acc = test_step(model, valid_loader, loss_fn, accuracy_fn, device)
        print(
            f"Epoch {epoch + 1} of {epochs}"
            f"\n-------------------------------"
            f"\nTrain loss: {train_loss:.5f} | Train accuracy: {train_acc:.4f}"
            f"\nValid loss: {valid_loss:.5f} | Valid accuracy: {valid_acc:.4f}"
        )

        results["train_losses"].append(train_loss.detach())
        results["train_accuracies"].append(train_acc.cpu())
        results["valid_losses"].append(valid_loss.detach())
        results["valid_accuracies"].append(valid_acc.cpu())
        if len(results["valid_losses"]) > 1 and results["valid_losses"][-2] - results["valid_losses"][-1] < threshold:
            tolerance += 1
            if tolerance > 2:
                break
    return results

Overwriting helper/engine.py


## Saving model

In [10]:
%%writefile helper/utils.py
"""
Contains utility functions for training and saving model
"""
import torch
from pathlib import Path

def save_model(model, save_dir, model_name, epoch, optimizer, loss):
    """
    Saves model checkpoint

    Args:
        model (torch.nn.Module): Model to be saved
        save_dir (str): Directory to save model
        model_name (str): Name of model
        epoch (int): Epoch number
        optimizer (torch.optim.Optimizer): Optimizer
        loss (float): Loss value

    Returns:
        None
    """
    save_dir = Path(save_dir)
    save_dir.mkdir(exist_ok=True, parents=True)
    save_path = save_dir / f'{model_name}_epoch{epoch}_loss{loss}.pth'
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, save_path)
    print(f'[INFO] Model saved to {save_path}')

Overwriting helper/utils.py


## Training script

Actually don't need this one