In [1]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")

[INFO] torch/torchvision versions not as required, installing nightly versions.
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113
torch version: 2.0.1
torchvision version: 0.15.2


In [107]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine

In [3]:
def get_device():
    if torch.cuda.is_available():
        return "cuda"
    elif torch.backends.mps.is_available():
        return "mps"
    else:
        return "cpu"

In [4]:
# device = get_device()
device = 'cpu'
device

'cpu'

In [5]:
def set_seeds(seed: int=42):
    torch.manual_seed(seed)

In [6]:
set_seeds()

In [7]:
## get data
import os
import requests
import zipfile
from pathlib import Path

In [8]:
def download_data(source: str,
                 destination: str,
                 remove_source: bool=True)-> Path:
    
    '''
    Downloads a zipped dataset from source and unzips to destination
    '''
    ## setup path
    data_path = Path("data/")
    image_path = data_path / destination
    
    ## if image path doesn't exist, create it
    if image_path.is_dir():
        print(f"Image path already exists, skipping download...")
        
    else:
        print(f"[INFO] Did not find {image_path} directory, creating one..")
        image_path.mkdir(parents=True, exist_ok = True)
        
        ## Download data
        target_file = Path(source).name
        with open(data_path/target_file, "wb") as f:
            request = requests.get(source)
            print(f"[INFO] Downloading {target_file} from {source}")
            f.write(request.content)
            
        with zipfile.ZipFile(data_path/target_file, 'r') as zip_ref:
            print(f"[INFO] Unzipping {target_file} data...")
            zip_ref.extractall(image_path)
                  
        ## removing the zip file
        if remove_source:
            os.remove(data_path/target_file)
        
    return image_path                 

In [9]:
image_path = download_data(source = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                          destination="pizza_steak_sushi")
image_path

Image path already exists, skipping download...


PosixPath('data/pizza_steak_sushi')

In [10]:
train_dir = image_path/"train"
test_dir  = image_path/"test"
train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [11]:
## Creating dataloaders with manual transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std = [0.229, 0.224, 0.225])

manual_transforms = transforms.Compose([transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                      normalize])

print(f"Manually created transforms: {manual_transforms}")

from going_modular.going_modular import data_setup

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir,
                                                           test_dir,
                                                           manual_transforms,
                                                           batch_size = 32,
                                                           num_workers = 4)
train_dataloader, test_dataloader, class_names

Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


(<torch.utils.data.dataloader.DataLoader at 0x105f98af0>,
 <torch.utils.data.dataloader.DataLoader at 0x160c3f340>,
 ['pizza', 'steak', 'sushi'])

In [12]:
## Creating dataloaders with automatic transforms
weights =  torchvision.models.EfficientNet_B0_Weights.DEFAULT
automatic_transforms = weights.transforms()
print(f"Default transforms of EfficientNetB0: {automatic_transforms}")

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir,
                                                           test_dir,
                                                           automatic_transforms,
                                                           batch_size = 32,
                                                           num_workers = 4)
train_dataloader, test_dataloader, class_names

Default transforms of EfficientNetB0: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x137352670>,
 <torch.utils.data.dataloader.DataLoader at 0x1061f9f70>,
 ['pizza', 'steak', 'sushi'])

In [13]:
## Freezing the base layers and change the classifier layer
## Old way: it'll show a warning msg
# model = torchvision.models.efficientnet_b0(pretrained = True).to(device)
# model

In [36]:
device = get_device()
device

'mps'

In [108]:
## new way
weights =  torchvision.models.EfficientNet_B0_Weights.DEFAULT
model = torchvision.models.efficientnet_b0(weights=weights).to(device)
#model

In [109]:
## freezing the base layer
for param in model.parameters():
    param.requires_grad = False

In [110]:
model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [111]:
## Adjust the classifier layer
model.classifier = nn.Sequential(
    nn.Dropout(0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
).to(device)

In [112]:
from torchinfo import summary

In [113]:
model = model.to(device)
next(model.parameters()).device

device(type='mps', index=0)

In [96]:
summary(model, input_size=[32,3,224,224],
       verbose=0, col_names=["input_size", "output_size", "num_params", "trainable"],
       col_width=20, row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

## Train a single model and track results

In [114]:
# Define loss function optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [115]:
from torch.utils import tensorboard

In [116]:
# Setup a SummaryWriter
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x284b1d8b0>

In [117]:
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

from going_modular.going_modular.engine import train_step, test_step

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in range(epochs):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        ### New: Experiment tracking ###
        # See SummaryWriter documentation
        writer.add_scalars(main_tag="Loss",
                           tag_scalar_dict={"train_loss": train_loss,
                                            "test_loss": test_loss},
                           global_step=epoch)
        
        writer.add_scalars(main_tag="Accuracy",
                           tag_scalar_dict={"train_acc": train_acc,
                                            "test_acc": test_acc},
                           global_step=epoch)
        
        writer.add_graph(model=model.to(device),
                         input_to_model=torch.randn(32, 3, 224, 224).to(device))

    # Close the writer
    writer.close()
    ### End new ### 

    # Return the filled results at the end of the epochs
    return results

In [119]:
# Train model 
# Note: not using engine.train(), since we updated the train() function above
set_seeds()
results = train(model=model,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=5,
                device=device)

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.7298 | train_acc: 0.8438 | test_loss: 0.7148 | test_acc: 0.8447


Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 0.6986 | train_acc: 0.8320 | test_loss: 0.6379 | test_acc: 0.9062


Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 0.5993 | train_acc: 0.8945 | test_loss: 0.5794 | test_acc: 0.9167


Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.5665 | train_acc: 0.8633 | test_loss: 0.5523 | test_acc: 0.8561


Exception ignored in: <function _ConnectionBase.__del__ at 0x11533c9d0>
Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 132, in __del__
    self._close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function tqdm.__del__ at 0x1156bdf70>
Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/site-packages/tqdm/std.py", line 1145, in __del__
    self.close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/site-packages/tqdm/notebook.py", line 286, in close
    self.disp(bar_style='success', check_delay=False)
AttributeError: 'tqdm' object has no attribute 'disp'


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.5369 | train_acc: 0.8516 | test_loss: 0.4890 | test_acc: 0.9062


Traceback (most recent call last):
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/Users/shafinsaapel/tensorflow-test/env/lib/python3.9/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


In [120]:
results

{'train_loss': [0.729796290397644,
  0.6985898315906525,
  0.5993300825357437,
  0.5665352083742619,
  0.5369024947285652],
 'train_acc': [0.84375, 0.83203125, 0.89453125, 0.86328125, 0.8515625],
 'test_loss': [0.7148434321085612,
  0.6378500461578369,
  0.5793994466463724,
  0.5522621075312296,
  0.4890164037545522],
 'test_acc': [0.8446969696969697,
  0.90625,
  0.9166666666666666,
  0.8560606060606061,
  0.90625]}

### View our model's results with TensorBoard

In [121]:
# Let's view our experiments from within the notebook
%reload_ext tensorboard

In [122]:
%tensorboard --logdir=runs