<a href="https://colab.research.google.com/github/Basit2612/going_modular/blob/main/05_Pytorch_Going_Modular_(script_mode).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
os.makedirs("going_modular", exist_ok =True)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Get data

In [3]:
%%writefile going_modular/get_data.py
import os
import zipfile
import requests
from pathlib import Path

data_path = Path("data")
image_path = data_path / "pizza_steak_sushi"

if image_path.is_dir():
  print("Data is already availble")
else:
  print("Data is downloading...")
  image_path.mkdir(parents=True, exist_ok = True)

with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
  request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
  f.write(request.content)

with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as zip_ref:
  zip_ref.extractall(image_path)

os.remove(data_path / "pizza_steak_sushi.zip")

Writing going_modular/get_data.py


In [4]:
from going_modular import get_data
train_dir = get_data.image_path / "train"
test_dir = get_data.image_path / "test"

train_dir, test_dir

Data is downloading...


(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

# Dataset and Dataloader

In [5]:
%%writefile going_modular/data_setup.py

from torchvision import transforms
data_transforms = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.ToTensor()
])

import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

def create_dataloaders(train_dir: str,
                       test_dir: str,
                       data_transforms:transforms.Compose,
                       batch_size: int,
                       num_workers: int = os.cpu_count()):

    train_data = datasets.ImageFolder(root=train_dir,
                                      transform = data_transforms,
                                      target_transform=None)

    test_data = datasets.ImageFolder(root = test_dir,
                                     transform= data_transforms,
                                     target_transform=None)

    train_dataloader = DataLoader(dataset= train_data,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  pin_memory=True)

    test_dataloader = DataLoader(dataset = test_data,
                                 batch_size= batch_size,
                                 shuffle=False,
                                 num_workers=num_workers,
                                 pin_memory=True)

    return train_data, test_data, train_dataloader, test_dataloader

Writing going_modular/data_setup.py


In [6]:
from going_modular import data_setup
train_data, test_data, train_dataloader, test_dataloader = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                              data_transforms= data_setup.data_transforms,
                                                                              batch_size = 32,
                                                                              num_workers = os.cpu_count())
train_data, test_data, train_dataloader, test_dataloader

(Dataset ImageFolder
     Number of datapoints: 225
     Root location: data/pizza_steak_sushi/train
     StandardTransform
 Transform: Compose(
                Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=True)
                ToTensor()
            ),
 Dataset ImageFolder
     Number of datapoints: 75
     Root location: data/pizza_steak_sushi/test
     StandardTransform
 Transform: Compose(
                Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=True)
                ToTensor()
            ),
 <torch.utils.data.dataloader.DataLoader at 0x7a0996782770>,
 <torch.utils.data.dataloader.DataLoader at 0x7a08d40b64a0>)

In [7]:
image, label = next(iter(train_dataloader))
image.shape, label.shape

(torch.Size([32, 3, 64, 64]), torch.Size([32]))

In [8]:
image_single, label_single = image[0].unsqueeze(dim=0), label[0]
image_single.shape, label_single.shape

(torch.Size([1, 3, 64, 64]), torch.Size([]))

In [9]:
class_names = train_data.classes
class_names

['pizza', 'steak', 'sushi']

In [10]:
class_to_idx = train_data.class_to_idx
class_to_idx

{'pizza': 0, 'steak': 1, 'sushi': 2}

# Making a Model (TinyVGG)

In [11]:
%%writefile going_modular/model_builder.py
import torch
from torch import nn
class TinyVGGModel(nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),

        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,
                     stride=2)
    )
    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  padding=0),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  padding=0),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*13*13,
                  out_features=output_shape)
    )

  def forward(self, x):
    x = self.conv_block_1(x)
    # print(x.shape)
    x = self.conv_block_2(x)
    # print(x.shape)
    x = self.classifier(x)
    # print(x.shape)
    return x

Writing going_modular/model_builder.py


# Creating Train Step & Test Step

In [12]:
%%writefile going_modular/engine.py
import torch
from torch import nn
from typing import Tuple
from typing import Dict, List
from tqdm.auto import tqdm
device = "cuda" if torch.cuda.is_available() else "cpu"


def train_step(model: torch.nn.Module,
               train_dataloader: torch.utils.data.DataLoader,
               optimizer: torch.optim.Optimizer,
               loss_fn: torch.nn.Module,
               device = device
               ) -> Tuple[float, float]:

  from sklearn.metrics import accuracy_score
  train_loss, train_acc= 0, 0
  model.to(device)
  model.train()

  for batch, (X, y) in enumerate(train_dataloader):
    X, y = X.to(device), y.to(device)

    # 1. Forward Pass
    y_logits = model(X)
    y_pred_probs = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)

    # 2. Calculate the Loss
    loss = loss_fn(y_logits, y)
    train_loss += loss.item()

    train_acc += accuracy_score(y, y_pred_probs)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer Step
    optimizer.step()

  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)

  return train_loss, train_acc


def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              optimizer: torch.optim.Optimizer,
              loss_fn: torch.nn.Module,
              device = device) -> Tuple[float, float]:

  from sklearn.metrics import accuracy_score
  model.eval()
  test_loss, test_acc =0, 0
  with torch.inference_mode():
    for batch, (X, y) in enumerate(dataloader):
      X, y = X.to(device), y.to(device)
      # 1. Forward Pass
      test_pred = model(X)
      test_pred_probs = torch.argmax(torch.softmax(test_pred, dim=1), dim=1)

      # 2. Calculate Loss
      loss = loss_fn(test_pred, y)
      test_loss += loss.item()
      test_acc += accuracy_score(y, test_pred_probs)

    test_loss /= len(dataloader)
    test_acc /= len(dataloader)

    return test_loss, test_acc


def train(model:torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          loss_fn: torch.nn.Module,
          optimizer: torch.optim.Optimizer,
          epochs: int,
          device=device) -> Dict[str, List[float]]:

  results={"Train Loss": [],
           "Train Accuracy": [],
           "Test Loss": [],
           "Test Accuracy": []}

  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       train_dataloader=train_dataloader,
                                       optimizer=optimizer,
                                       loss_fn=loss_fn,
                                       device=device)

    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    optimizer=optimizer,
                                    loss_fn=loss_fn,
                                    device=device)

    # Print out what's happening
    print(f"Epochs = {epoch} | Train Loss {train_loss:.4f}\tTrain Accuracy {train_acc:.4f} | Test Loss {test_loss:.4f}\tTest Accuracy {test_acc:.4f}")
    results["Train Loss"].append(train_loss)
    results["Train Accuracy"].append(train_acc)
    results["Test Loss"].append(test_loss)
    results["Test Accuracy"].append(test_acc)

  return results

Writing going_modular/engine.py


# Save Model

In [13]:
%%writefile going_modular/utils.py
from pathlib import Path
import torch

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):

  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True, exist_ok=True)

  assert model_name.endswith(".pth") or model_name.endswith(".pt")
  model_save_path = target_dir_path/model_name

  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

Writing going_modular/utils.py


# Train & Evaluate the Model

In [14]:
import torch
from torch import nn
torch.manual_seed(42)
torch.cuda.manual_seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"

NUM_EPOCHS = 5
BATCH_SIZE = 32
HIDDEN_UNITS = 10
LEARNING_RATE = 0.001

from going_modular import get_data, data_setup, model_builder, engine, utils

# Get Data
train_dir = get_data.image_path / "train"
test_dir = get_data.image_path / "test"

# Data Setup
train_data, test_data, train_dataloader, test_dataloader = data_setup.create_dataloaders(train_dir= train_dir,
                                                                              test_dir=test_dir,
                                                                              data_transforms= data_setup.data_transforms,
                                                                              batch_size = BATCH_SIZE,
                                                                              num_workers = os.cpu_count())

# Model Setup
model_0 = model_builder.TinyVGGModel(input_shape=3,
                                     hidden_units=HIDDEN_UNITS,
                                     output_shape=len(train_data.classes)).to(device)

# Loss and Optimizer Setup
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_0.parameters(),
                             lr=LEARNING_RATE)

# Engine
results = engine.train(model=model_0,
                       train_dataloader=train_dataloader,
                       test_dataloader=test_dataloader,
                       loss_fn=loss_fn,
                       optimizer=optimizer,
                       epochs=NUM_EPOCHS,
                       device=device)
results

# Utils
utils.save_model(model=model_0,
                 target_dir= "models",
                 model_name="05_going_modular_script_model.pth")

  0%|          | 0/5 [00:00<?, ?it/s]

Epochs = 0 | Train Loss 1.1063	Train Accuracy 0.3047 | Test Loss 1.0983	Test Accuracy 0.3011
Epochs = 1 | Train Loss 1.0998	Train Accuracy 0.3281 | Test Loss 1.0697	Test Accuracy 0.5417
Epochs = 2 | Train Loss 1.0869	Train Accuracy 0.4883 | Test Loss 1.0808	Test Accuracy 0.4924
Epochs = 3 | Train Loss 1.0842	Train Accuracy 0.3984 | Test Loss 1.0608	Test Accuracy 0.5833
Epochs = 4 | Train Loss 1.0662	Train Accuracy 0.4141 | Test Loss 1.0654	Test Accuracy 0.5644
[INFO] Saving model to: models/05_going_modular_script_model.pth
