In [16]:
import torch
import torchvision
from torch import nn
from torchvision import transforms, datasets
from torchinfo import summary
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import notebook_tqdm
from typing import Dict, List, Tuple
import os
import zipfile
from pathlib import Path
import requests
import gradio as gr
from timeit import default_timer as timer
import random
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"

In [17]:
def download_data(
        source: str,
        destination: str,
        remove_source: bool = True) -> Path:
    """Downloads zipped dataset from source and unzips to destination"""
    # Setup path to data folder
    data_path = Path("data/")
    image_path = data_path / destination

    if image_path.is_dir():
        print("Directory already exists, skipping download")
    else:
        image_path.mkdir(parents=True, exist_ok=True)
        target_file = Path(source).name
        with open(data_path / target_file, "wb") as f:
            request = requests.get(source)
            print("Downloading target file from source")
            f.write(request.content)

        with zipfile.ZipFile(data_path / target_file, 'r') as zip_ref:
            zip_ref.extractall(image_path)

        if remove_source:
            os.remove(data_path / target_file)
    
    return image_path

Directory already exists, skipping download


In [18]:
def create_effnetb2_model(num_classes:int,
                          seed:int=42):
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    for param in model.parameters():
        param.requires_grad = False  # Don't want to track the gradients

    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes)
    )

    return model, transforms

In [46]:
effnetb2_101, effnetb2_transforms = create_effnetb2_model(num_classes=101, seed=42)

In [20]:
"""
Contains functionality for creating PyTorch DataLoaders for 
image classification data.
"""
import os

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

NUM_WORKERS = 0

def create_dataloaders(
    train_dir: str, 
    test_dir: str, 
    transform: transforms.Compose, 
    batch_size: int, 
    num_workers=NUM_WORKERS
):
  """Creates training and testing DataLoaders.

  Takes in a training directory and testing directory path and turns
  them into PyTorch Datasets and then into PyTorch DataLoaders.

  Args:
    train_dir: Path to training directory.
    test_dir: Path to testing directory.
    transform: torchvision transforms to perform on training and testing data.
    batch_size: Number of samples per batch in each of the DataLoaders.
    num_workers: An integer for number of workers per DataLoader.

  Returns:
    A tuple of (train_dataloader, test_dataloader, class_names).
    Where class_names is a list of the target classes.
    Example usage:
      train_dataloader, test_dataloader, class_names = \
        = create_dataloaders(train_dir=path/to/train_dir,
                             test_dir=path/to/test_dir,
                             transform=some_transform,
                             batch_size=32,
                             num_workers=4)
  """
  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

In [23]:
"""
Contains functions for training and testing a PyTorch model.
"""
import torch

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:
    """Trains a PyTorch model for a single epoch.

    Turns a target PyTorch model to training mode and then
    runs through all of the required training steps (forward
    pass, loss calculation, optimizer step).

    Args:
    model: A PyTorch model to be trained.
    dataloader: A DataLoader instance for the model to be trained on.
    loss_fn: A PyTorch loss function to minimize.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A tuple of training loss and training accuracy metrics.
    In the form (train_loss, train_accuracy). For example:

    (0.1112, 0.8743)
    """
    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0

    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate and accumulate accuracy metric across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch 
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:
    """Tests a PyTorch model for a single epoch.

    Turns a target PyTorch model to "eval" mode and then performs
    a forward pass on a testing dataset.

    Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). For example:

    (0.0223, 0.8985)
    """
    # Put model in eval mode
    model.eval() 

    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0

    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            # Send data to target device
            X, y = X.to(device), y.to(device)

            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

    # Adjust metrics to get average loss and accuracy per batch 
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }
    
    # Make sure model on target device
    model.to(device)

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"\nEpoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    # Return the filled results at the end of the epochs
    return results

In [25]:
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
    """Saves a PyTorch model to a target directory.

    Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extension.

    Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="05_going_modular_tingvgg_model.pth")
    """
    # Create target directory
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                        exist_ok=True)

    # Create model save path
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
    model_save_path = target_dir_path / model_name

    # Save the model state_dict()
    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(),
             f=model_save_path)

In [40]:
food_101_train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.TrivialAugmentWide(),
    effnetb2_transforms
])

In [42]:
# Get data for FoodVision101
from torchvision import datasets

data_dir = Path("data")
train_data = datasets.Food101(
    root=data_dir,
    split="train",
    transform=food_101_train_transforms,
    download=True
)

test_data = datasets.Food101(
    root=data_dir,
    split="test",
    transform=effnetb2_transforms,  # Normal transforms, we don't want to augment test data,
    download=True
)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data\food-101.tar.gz


100%|██████████| 4996278331/4996278331 [03:32<00:00, 23475371.10it/s]


Extracting data\food-101.tar.gz to data


In [43]:
food101_class_names = train_data.classes

['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare', 'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito']


In [52]:
# Need some dataloaders
import torch

BATCH_SIZE = 32
NUM_WORKERS = 4

train_dataloader_food101_20 = torch.utils.data.DataLoader(dataset=train_data,
                                                          batch_size=BATCH_SIZE,
                                                          shuffle=True,
                                                          num_workers=NUM_WORKERS)

test_dataloader_food101_20 = torch.utils.data.DataLoader(dataset=test_data,
                                                          batch_size=BATCH_SIZE,
                                                          shuffle=True,
                                                          num_workers=NUM_WORKERS)

In [53]:
optimizer = torch.optim.Adam(params=effnetb2_101.parameters(),
                             lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=0.1)

food_vision_101 = train(
    model=effnetb2_101,
    train_dataloader=train_dataloader_food101_20,
    test_dataloader=test_dataloader_food101_20,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=10,
    device=device
)

 10%|█         | 1/10 [05:14<47:07, 314.19s/it]


Epoch: 1 | train_loss: 2.6041 | train_acc: 0.4966 | test_loss: 2.0412 | test_acc: 0.6479


 20%|██        | 2/10 [10:29<41:58, 314.79s/it]


Epoch: 2 | train_loss: 2.5898 | train_acc: 0.5014 | test_loss: 2.0264 | test_acc: 0.6526


 30%|███       | 3/10 [15:43<36:41, 314.51s/it]


Epoch: 3 | train_loss: 2.5956 | train_acc: 0.4990 | test_loss: 2.0317 | test_acc: 0.6510


 40%|████      | 4/10 [20:57<31:25, 314.29s/it]


Epoch: 4 | train_loss: 2.5866 | train_acc: 0.5002 | test_loss: 2.0267 | test_acc: 0.6519


 50%|█████     | 5/10 [26:11<26:10, 314.04s/it]


Epoch: 5 | train_loss: 2.5948 | train_acc: 0.4993 | test_loss: 2.0400 | test_acc: 0.6498


 60%|██████    | 6/10 [31:24<20:55, 313.85s/it]


Epoch: 6 | train_loss: 2.5863 | train_acc: 0.5011 | test_loss: 2.0368 | test_acc: 0.6512


 70%|███████   | 7/10 [36:39<15:42, 314.13s/it]


Epoch: 7 | train_loss: 2.5893 | train_acc: 0.4988 | test_loss: 2.0237 | test_acc: 0.6523


 80%|████████  | 8/10 [41:54<10:28, 314.31s/it]


Epoch: 8 | train_loss: 2.5847 | train_acc: 0.5041 | test_loss: 2.0310 | test_acc: 0.6518


 90%|█████████ | 9/10 [47:08<05:14, 314.43s/it]


Epoch: 9 | train_loss: 2.5928 | train_acc: 0.5005 | test_loss: 2.0272 | test_acc: 0.6543


100%|██████████| 10/10 [52:23<00:00, 314.38s/it]


Epoch: 10 | train_loss: 2.5884 | train_acc: 0.5014 | test_loss: 2.0277 | test_acc: 0.6525





In [54]:
effnetb2_101_model_path = "Food_101.pth"
save_model(model=effnetb2_101,
           target_dir="models/",
           model_name=effnetb2_101_model_path)

# 367s/it with num_workers = os.cpu_count()
# 29:12 to train whole thing

[INFO] Saving model to: models\Food_101.pth


In [55]:
# Populate the demos folder
from pathlib import Path
foodvision_101_demo_path = Path("demos/food_classifier_101")
foodvision_101_demo_path.mkdir(parents=True,
                               exist_ok=True)

(foodvision_101_demo_path / "examples").mkdir(parents=True, exist_ok=True)

In [56]:
# Need class_names.txt
food_vision_101_class_names_path = foodvision_101_demo_path / "class_names.txt"

with open(food_vision_101_class_names_path, "w") as f:
    f.write("\n".join(food101_class_names))

In [58]:
%%writefile demos/food_classifier_101/model.py
import torch
import torchvision

from torch import nn


def create_effnetb2_model(num_classes:int=3, 
                          seed:int=42):
    """Creates an EfficientNetB2 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): EffNetB2 feature extractor model. 
        transforms (torchvision.transforms): EffNetB2 image transforms.
    """
    # Create EffNetB2 pretrained weights, transforms and model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    # Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes),
    )
    
    return model, transforms

Writing demos/food_classifier_101/model.py


In [61]:
%%writefile demos/food_classifier_101/app.py
### 1. Imports and class names setup ### 
import gradio as gr
import os
import torch

from model import create_effnetb2_model
from timeit import default_timer as timer
from typing import Tuple, Dict

# Setup class names
with open("class_names.txt", "r") as f: # reading them in from class_names.txt
    class_names = [food_name.strip() for food_name in  f.readlines()]
    
### 2. Model and transforms preparation ###    

# Create model
effnetb2, effnetb2_transforms = create_effnetb2_model(
    num_classes=101, # could also use len(class_names)
)

# Load saved weights
effnetb2.load_state_dict(
    torch.load(
        f="Food_101.pth",
        map_location=torch.device("cpu"),  # load to CPU
    )
)

### 3. Predict function ###

# Create predict function
def predict(img) -> Tuple[Dict, float]:
    """Transforms and performs a prediction on img and returns prediction and time taken.
    """
    # Start the timer
    start_time = timer()
    
    # Transform the target image and add a batch dimension
    img = effnetb2_transforms(img).unsqueeze(0)
    
    # Put model into evaluation mode and turn on inference mode
    effnetb2.eval()
    with torch.inference_mode():
        # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
        pred_probs = torch.softmax(effnetb2(img), dim=1)
    
    # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate the prediction time
    pred_time = round(timer() - start_time, 5)
    
    # Return the prediction dictionary and prediction time 
    return pred_labels_and_probs, pred_time

### 4. Gradio app ###

# Create title, description and article strings
title = "Food Classifier"
description = "An EfficientNetB2 feature extractor computer vision model to classify images of food."
article = "Created by Myles Penner (https://github.com/MylesJP)"

# Create examples list from "examples/" directory
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create Gradio interface 
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Label(num_top_classes=5, label="Predictions"),
        gr.Number(label="Prediction time (s)"),
    ],
    examples=example_list,
    title=title,
    description=description,
    article=article,
)

# Launch to Gradio
demo.launch()

Overwriting demos/food_classifier_101/app.py


In [63]:
%%writefile demos/food_classifier_101/requirements.txt
torch==1.12.0
torchvision==0.13.0
gradio==3.1.4

Writing demos/food_classifier_101/requirements.txt
