In [42]:
try: 
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")



[INFO] torch/torchvision versions not as required, installing nightly versions.
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113
torch version: 2.6.0
torchvision version: 0.21.0


In [43]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
from torchinfo import summary

from going_modular import data_setup, engine

In [44]:
device="mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

In [45]:
import os
import zipfile

from pathlib import Path

import requests

# Setup path to data folder
data_path = Path("data/")
image_path = data_path / "pizza_steak_sushi"

# If the image folder doesn't exist, download it and prepare it... 
if image_path.is_dir():
    print(f"{image_path} directory exists.")
else:
    print(f"Did not find {image_path} directory, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)
    
    # Download pizza, steak, sushi data
    with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
        request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
        print("Downloading pizza, steak, sushi data...")
        f.write(request.content)

    # Unzip pizza, steak, sushi data
    with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
        print("Unzipping pizza, steak, sushi data...") 
        zip_ref.extractall(image_path)

    # Remove .zip file
    os.remove(data_path / "pizza_steak_sushi.zip")

data/pizza_steak_sushi directory exists.


In [46]:
# Setup Dirs
train_dir = image_path / "train"
test_dir = image_path / "test"


### Bring input data to the same transformations used for pretrained model

In [47]:
# Create a transforms pipeline manually (required for torchvision < 0.13)
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)), # 1. Reshape all images to 224x224 (though some models may require different sizes)
    transforms.ToTensor(), # 2. Turn image values to between 0 & 1 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # 3. A mean of [0.485, 0.456, 0.406] (across each colour channel)
                         std=[0.229, 0.224, 0.225]) # 4. A standard deviation of [0.229, 0.224, 0.225] (across each colour channel),
])

In [48]:
train_dataloader, test_dataloader, class_names=data_setup.create_dataloaders(train_dir,test_dir,transform=manual_transforms,batch_size=32)

In [49]:
next(iter(train_dataloader))

[tensor([[[[ 0.5193,  0.6906,  0.8961,  ...,  2.2318,  2.2318,  2.2318],
           [ 0.5536,  0.7248,  0.9646,  ...,  2.2318,  2.2318,  2.2318],
           [ 0.6392,  0.8276,  1.0844,  ...,  2.2318,  2.2318,  2.2318],
           ...,
           [-1.4158, -1.4158, -1.4329,  ..., -0.9363, -0.9363, -0.9192],
           [-1.4672, -1.5014, -1.5185,  ..., -0.9705, -1.0048, -1.0048],
           [-1.5014, -1.5699, -1.6213,  ..., -1.0733, -1.1075, -1.1418]],
 
          [[ 0.7304,  0.9055,  1.1155,  ...,  2.4111,  2.4111,  2.4111],
           [ 0.7654,  0.9405,  1.1856,  ...,  2.4111,  2.4111,  2.4111],
           [ 0.8529,  1.0455,  1.3081,  ...,  2.4111,  2.4111,  2.4111],
           ...,
           [-1.3354, -1.3354, -1.3529,  ..., -0.8627, -0.8627, -0.8627],
           [-1.4230, -1.4405, -1.4580,  ..., -0.9153, -0.9503, -0.9503],
           [-1.4755, -1.5280, -1.5630,  ..., -1.0203, -1.0553, -1.0903]],
 
          [[ 0.9319,  1.1062,  1.3328,  ...,  2.6226,  2.6226,  2.6226],
           [ 

In [50]:
class_names

['pizza', 'steak', 'sushi']

In [51]:
## get the transforms used for pre trained model automatically without manual trabnsformas (update after torch 0.13)
# Get a set of pretrained model weights
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # .DEFAULT = best available weights from pretraining on ImageNet
weights

EfficientNet_B0_Weights.IMAGENET1K_V1

In [52]:
auto_transforms=weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [53]:
train_dataloader, test_dataloader, class_names=data_setup.create_dataloaders(train_dir,test_dir,transform=auto_transforms,batch_size=32)

## Get pretrained model from torchvision.models

In [None]:
weights=torchvision.models.EfficientNet_B0_Weights.DEFAULT
model=torchvision.models.efficientnet_b0(weights=weights).to(device)

In [55]:
summary(model)

Layer (type:depth-idx)                                  Param #
EfficientNet                                            --
├─Sequential: 1-1                                       --
│    └─Conv2dNormActivation: 2-1                        --
│    │    └─Conv2d: 3-1                                 864
│    │    └─BatchNorm2d: 3-2                            64
│    │    └─SiLU: 3-3                                   --
│    └─Sequential: 2-2                                  --
│    │    └─MBConv: 3-4                                 1,448
│    └─Sequential: 2-3                                  --
│    │    └─MBConv: 3-5                                 6,004
│    │    └─MBConv: 3-6                                 10,710
│    └─Sequential: 2-4                                  --
│    │    └─MBConv: 3-7                                 15,350
│    │    └─MBConv: 3-8                                 31,290
│    └─Sequential: 2-5                                  --
│    │    └─MBConv: 3-9         

In [56]:
print(model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [61]:
# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
) 

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [59]:
##Freeze feature extraction layer 
for param in model.features.parameters():
    param.requires_grad=False

In [60]:
#Update classifier head to our custom problem
output_features=len(class_names)
model.classifier = torch.nn.Sequential(
    nn.Dropout(p=0.2,inplace = True),
    nn.Linear(in_features= 1280, out_features= output_features, bias=True )
    
).to(device)

## Train the model

In [None]:
device

'mps'

In [78]:
model.parameters().__next__()

Parameter containing:
tensor([[[[ 1.2156e-01,  6.5634e-01,  4.5671e-01],
          [-1.1092e-01, -6.1004e-01, -3.3345e-01],
          [ 2.7964e-02, -1.0312e-01, -1.0324e-01]],

         [[ 6.3553e-02,  1.6552e+00,  1.7436e+00],
          [-1.3646e-01, -1.5367e+00, -1.5937e+00],
          [ 5.0196e-02, -1.1360e-01, -1.2600e-01]],

         [[ 8.7276e-02,  3.6126e-01,  2.6946e-01],
          [-1.1966e-01, -2.8122e-01, -2.1883e-01],
          [ 3.6658e-02, -7.0751e-02, -8.1917e-02]]],


        [[[ 1.6449e-01, -2.0041e-01,  8.3092e-02],
          [ 8.9009e-01, -1.2110e+00,  2.7610e-01],
          [ 1.0740e+00, -1.2603e+00,  2.0645e-01]],

         [[ 3.2816e-01, -4.3449e-01,  1.8769e-01],
          [ 1.6213e+00, -2.1188e+00,  4.1014e-01],
          [ 1.7230e+00, -2.0756e+00,  3.3958e-01]],

         [[ 9.5290e-02, -1.5971e-01,  7.4559e-02],
          [ 8.0502e-01, -9.7034e-01,  2.6280e-01],
          [ 7.1944e-01, -1.0026e+00,  1.9870e-01]]],


        [[[ 5.3857e-02,  2.6367e-01,  1.1696

In [72]:
type(weights.get_state_dict)

method

In [81]:
dir(train_dataloader)

['_DataLoader__initialized',
 '_DataLoader__multiprocessing_context',
 '_IterableDataset_len_called',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_auto_collation',
 '_dataset_kind',
 '_get_iterator',
 '_index_sampler',
 '_iterator',
 'batch_sampler',
 'batch_size',
 'check_worker_number_rationality',
 'collate_fn',
 'dataset',
 'drop_last',
 'generator',
 'in_order',
 'multiprocessing_context',
 'num_workers',
 'persistent_workers',
 'pin_memory',
 'pin_memory_device',
 'prefetch_factor',
 'sampler',
 'timeout',
 'worker_init_fn']

In [None]:
optimizer=torch.optim.Adam(params=model.parameters(),lr=0.001)
loss_fn=nn.CrossEntropyLoss()

torch.manual_seed(42)
#make sure everything is on device
device = torch.device("mps")  # Use Metal Performance Shaders on Mac
model = model.to(device)  # Move the model to MPS
train_dataloader = train_dataloader.to(device)  
test_dataloader=test_dataloader.to(device)


train_results=engine.train(model=model,train_dataloader=train_dataloader,test_dataloader=test_dataloader,optimizer=optimizer,loss_fn=loss_fn,epochs=1000,device=device)

train_results

  0%|          | 0/1000 [00:00<?, ?it/s]

RuntimeError: Input type (MPSFloatType) and weight type (torch.FloatTensor) should be the same