## 0 - Get setup

In [1]:
!pip install --upgrade torchvision
!pip install --upgrade torch
import torch
import torchvision
print(f"torch version:{torch.__version__}")
print(f"torchvision version:{torchvision.__version__}")

Collecting torchvision
  Downloading torchvision-0.19.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting torch==2.4.0 (from torchvision)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.4.0->torchvision)
  Downloading 

In [2]:
import matplotlib.pyplot as plt
from torch import nn
from torchvision import transforms

try:
  from torchinfo import summary
except:
  !pip install torchinfo
  from torchinfo import summary

In [3]:
# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !mv pytorch-deep-learning/helper_functions.py . # get the helper_functions.py script
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves

[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4056, done.[K
remote: Total 4056 (delta 0), reused 0 (delta 0), pack-reused 4056[K
Receiving objects: 100% (4056/4056), 646.90 MiB | 32.14 MiB/s, done.
Resolving deltas: 100% (2372/2372), done.
Updating files: 100% (248/248), done.


In [5]:
!ls going_modular/going_modular

README.md    data_setup.py  model_builder.py  train.py
__pycache__  engine.py	    predictions.py    utils.py


In [6]:
device="cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

## 2 - Creating a function to make an EffNetB2 feature extractor

In [7]:
def create_effnetb2_model(num_classes:int=3,
                          seed:int=42):
  weights=torchvision.models.EfficientNet_B2_Weights.DEFAULT
  transform=weights.transforms()
  model=torchvision.models.efficientnet_b2(weights=weights)



  for param in model.parameters():
    param.requires_grad=False
  torch.manual_seed(seed)
  model.classifier=nn.Sequential(nn.Dropout(p=0.3,inplace=True),
                                 nn.Linear(in_features=1408,out_features=num_classes))
  return model,transform


In [12]:
# Creating the model and its transforms
effnetb2_food101,effnetb2_transforms=create_effnetb2_model(num_classes=101)

In [14]:
# taking a look at the model transforms
effnetb2_transforms

ImageClassification(
    crop_size=[288]
    resize_size=[288]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [13]:
from torchinfo import summary

summary(model=effnetb2_food101,
        input_size=(1,3,224,224),
        col_names=["input_size","output_size","num_params","trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 101]             --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1408, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

Since we're working with a larger dataset, we may want to introduce some data augmentation techniques:

*    This is because with larger datasets and larger models, overfitting becomes more of a problem.
*    Because we're working with a large number of classes, let's use TrivialAugment as our data augmentation technique.

For a list of state-of-the-art computer vision recipes: https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/


In [16]:
# creating custom transforms
food101_train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.TrivialAugmentWide(),
    effnetb2_transforms])
food101_train_transforms

Compose(
    TrivialAugmentWide(num_magnitude_bins=31, interpolation=InterpolationMode.NEAREST, fill=None)
    ImageClassification(
    crop_size=[288]
    resize_size=[288]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)
)

## 3 - Getting data for FoodVision Big 
Get Food101 dataset - https://pytorch.org/vision/main/generated/torchvision.datasets.Food101.html

In [17]:
from torchvision import datasets

# setup data directory
from pathlib import Path
data_dir=Path("data")

# get the training data

train_data=datasets.Food101(root=data_dir,
                            split="train",
                            transform=food101_train_transforms,# apply data augmentation to training data
                            download=True)
test_data=datasets.Food101(root=data_dir,
                           split="test",
                           transform=effnetb2_transforms,# don't perform data augmentation on the test data
                           download=True)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data/food-101.tar.gz


100%|██████████| 4996278331/4996278331 [00:31<00:00, 159367084.17it/s]


Extracting data/food-101.tar.gz to data


In [20]:
# Get Food101 class names
food101_class_names =train_data.classes
food101_class_names[:10]

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito']

### 3.1 - Creating a subset of the Food101 dataset for faster experimenting
We want our first few experiments to run as quick as possible.
To do so, let's make a subset of 20% of the data from the Food101 dataset (training and test).

Our short-term goal: to beat the original Food101 paper result of 56.40% accuracy on the test dataset (see the paper: https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/static/bossard_eccv14_food-101.pdf)

We want to beat this result using modern deep learning techniques and only 20% of the data.

In [21]:
from torch.utils.data import random_split # https://pytorch.org/docs/stable/data.html#torch.utils.data.random_split

def split_dataset(dataset:torchvision.datasets,
                  split_size:float=0.2,
                  seed:int=42):
  # Create split lengths based on original dataset length
  length_1 = int(len(dataset) * split_size) # defaults to 20% data split
  length_2 = len(dataset) - length_1 # remaining length

  # Print out info
  print(f"[INFO] Splitting dataset of length {len(dataset)} into splits of size: {length_1} and {length_2}")

  # Create splits with given random seed
  random_split_1, random_split_2 = torch.utils.data.random_split(dataset,
                                                                 lengths=[length_1, length_2],
                                                                 generator=torch.manual_seed(seed))

  return random_split_1, random_split_2


In [22]:
length_1 = int(len(train_data) * 0.2) # defaults to 20% data split
length_2 = len(train_data) - length_1 # remaining length
length_1,length_2

(15150, 60600)

In [23]:
# Create training 20% split Food101
train_data_food101_20_percent, _ = split_dataset(dataset=train_data,
                                                 split_size=0.2)

# Create testing 20% split Food101
test_data_food101_20_percent, _ = split_dataset(dataset=test_data,
                                                split_size=0.2)


[INFO] Splitting dataset of length 75750 into splits of size: 15150 and 60600
[INFO] Splitting dataset of length 25250 into splits of size: 5050 and 20200


In [24]:
len(train_data_food101_20_percent),len(test_data_food101_20_percent)

(15150, 5050)

## 4 - Turning our Food101 datasets into DataLoaders

In [25]:
# checking the number of CPU available
import os
os.cpu_count()

4

In [27]:
import os
import torch

NUM_WORKERS =os.cpu_count() # 4 # this value is very experimental and the best value will differ depeneding on the hardware you're using, search "pytorch num workers setting for more"
BATCH_SIZE= 32

# Create Food101 20% training DataLoader
train_dataloader_food101_20_percent = torch.utils.data.DataLoader(dataset=train_data_food101_20_percent,
                                                                  batch_size=BATCH_SIZE,
                                                                  shuffle=True,
                                                                  num_workers=NUM_WORKERS,
                                                                  pin_memory=True) #  enables faster data transfer to CUDA-enabled GPUs
# Create Food101 20% testing DataLoader
test_dataloader_food101_20_percent = torch.utils.data.DataLoader(dataset=test_data_food101_20_percent,
                                                                 batch_size=BATCH_SIZE,
                                                                 shuffle=False,
                                                                 num_workers=NUM_WORKERS,
                                                                 pin_memory=True) #  enables faster data transfer to CUDA-enabled GPUs

In [28]:
len(train_dataloader_food101_20_percent),len(test_dataloader_food101_20_percent)

(474, 158)