### Creating a function to read data from file and load into dataloader

In [24]:
%%writefile pokemon_modules/data_setup.py
"""
Conains functionaliy for creaing DataLoader for 
image classification data
"""

import os 
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

def create_dataloaders(
	train_dir:str,
	test_dir:str,
	transformer:transforms.Compose,
	batch_size:int,
	):
	train_data = datasets.ImageFolder(train_dir, transformer)
	test_data = datasets.ImageFolder(test_dir, transformer)
	classes = train_data.classes
	train_dataloader = DataLoader(train_data, batch_size, shuffle=True,pin_memory=True)
	test_dataloader = DataLoader(test_data, batch_size,pin_memory=True)

	return train_dataloader, test_dataloader, classes

Overwriting pokemon_modules/data_setup.py


### Creating function for model building

In [53]:
%%writefile pokemon_modules/model_builder.py
"""
Contains python code to instantiate a pytorch nn model
"""

import torch
from torch import nn

class TinyVGG(nn.Module):
	
	def __init__(self, input_shape:int,
		hidden_units:int,
		output_shape:int):
		super().__init__()	
		self.conv_block_1 = nn.Sequential(
			nn.Conv2d(input_shape, hidden_units, 3, padding=1),
			nn.ReLU(),
			nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(2),
			nn.Dropout(p=0.5)
			)
		self.conv_block_2 = nn.Sequential(
			nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
			nn.ReLU(),
			nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(2),
			nn.Dropout(p=0.5)

			)
		self.classifier = nn.Sequential(
			nn.Flatten(),
			nn.Linear(in_features=hidden_units*16*16,out_features=output_shape)
		)
	
	def forward(self, x):
		return self.classifier(self.conv_block_2(self.conv_block_1(x)))


Overwriting pokemon_modules/model_builder.py


### Creating model for traning and testing


In [4]:
from pokemon_modules import model_builder
model = model_builder.TinyVGG(3,10,1)
from torchvision import transforms

In [5]:
model_transformer = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor()
])

In [6]:
from pokemon_modules import data_setup
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir, test_dir=test_dir, transformer=model_transformer, num_batches=16)
img,label=next(iter(train_dataloader))


In [7]:

img.shape

torch.Size([16, 3, 64, 64])

In [None]:
import torch
model.eval()
with torch.inference_mode():
    pred = model(img)
    

In [10]:
pred.argmax()

tensor(15)

In [42]:
%%writefile pokemon_modules/engine.py

import torch
from typing import Dict, List, Tuple
from tqdm.auto import tqdm
def train_step(model:torch.nn.Module,
			   dataloader:torch.utils.data.DataLoader,
			   optim:torch.optim.Optimizer,
			   loss_fn:torch.nn.Module,):
	"""Trains a PyTorch model for a single epoch.

	Turns a target PyTorch model to training mode and then
	runs through all of the required training steps (forward
	pass, loss calculation, optimizer step).

	Args:
		model: A PyTorch model to be trained.
		dataloader: A DataLoader instance for the model to be trained on.
		loss_fn: A PyTorch loss function to minimize.
		optimizer: A PyTorch optimizer to help minimize the loss function.
		device: A target device to compute on (e.g. "cuda" or "cpu").

	Returns:
		A tuple of training loss and training accuracy metrics.
		In the form (train_loss, train_accuracy). For example:

		(0.1112, 0.8743)
	"""
	train_loss, train_acc=0,0
	model.train()
	for X,y in dataloader:
		pred=model(X)
		loss=loss_fn(pred, y)
		train_loss+=loss
		pred_class = torch.argmax(torch.softmax(pred, dim=1), dim=1)
		train_acc += (pred_class == y).sum().item()/len(pred)
		optim.zero_grad()
		loss.backward()
		optim.step()
	train_loss = train_loss /len(dataloader)
	train_acc = train_acc / len(dataloader)
	return train_loss, train_acc

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module) -> Tuple[float, float]:
  """Tests a PyTorch model for a single epoch.

  Turns a target PyTorch model to "eval" mode and then performs
  a forward pass on a testing dataset.

  Args:
    model: A PyTorch model to be tested.
    dataloader: A DataLoader instance for the model to be tested on.
    loss_fn: A PyTorch loss function to calculate loss on the test data.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A tuple of testing loss and testing accuracy metrics.
    In the form (test_loss, test_accuracy). For example:

    (0.0223, 0.8985)
  """
  model.eval()
  test_loss, test_acc = 0, 0
  with torch.inference_mode():
      for X, y in dataloader:
          test_pred_logits = model(X)
          loss = loss_fn(test_pred_logits, y)
          test_loss += loss.item()
          test_pred_labels = test_pred_logits.argmax(dim=1)
          test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optim: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int) -> Dict[str, List[float]]:
  """Trains and tests a PyTorch model.

  Passes a target PyTorch models through train_step() and test_step()
  functions for a number of epochs, training and testing the model
  in the same epoch loop.

  Calculates, prints and stores evaluation metrics throughout.

  Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

  Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form: {train_loss: [...],
                  train_acc: [...],
                  test_loss: [...],
                  test_acc: [...]}
    For example if training for epochs=2:
                 {train_loss: [2.0616, 1.0537],
                  train_acc: [0.3945, 0.3945],
                  test_loss: [1.2641, 1.5706],
                  test_acc: [0.3400, 0.2973]}
  """
  # Create empty results dictionary
  results = {"train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  for epoch in tqdm(range(epochs)):
      train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optim=optim)
      test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn)

      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
      )

      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

  return results

Overwriting pokemon_modules/engine.py


In [15]:
%%writefile pokemon_modules/utils.py
"""
File containing various utility functions for pytorch model training
"""
import torch
from pathlib import Path

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
  """Saves a PyTorch model to a target directory.

  Args:
    model: A target PyTorch model to save.
    target_dir: A directory for saving the model to.
    model_name: A filename for the saved model. Should include
      either ".pth" or ".pt" as the file extension.

  Example usage:
    save_model(model=model_0,
               target_dir="models",
               model_name="05_going_modular_tingvgg_model.pth")
  """
  # Create target directory
  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True,
                        exist_ok=True)

  # Create model save path
  assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
  model_save_path = target_dir_path / model_name

  # Save the model state_dict()
  print(f"[INFO] Saving model to: {model_save_path}")
  torch.save(obj=model.state_dict(),
             f=model_save_path)

Writing pokemon_modules/utils.py


In [54]:
%%writefile pokemon_modules/train.py
"""
Trains a Pytorch image classification model using device-agnostic code
"""

import os
import torch
from torchvision import transforms
import data_setup, engine, model_builder, utils
from timeit import default_timer as timer
from pathlib import Path
data_dir = Path("data")
train_dir = data_dir/"train"
test_dir = data_dir/"test"

NUM_EPOCHS = 7
BATHC_SIZE = 16
HIDDEN_UNITS =  10
LEARNING_RATE = 0.001


data_transform = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor()
])

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transformer=data_transform,
                                                                               batch_size=BATHC_SIZE)

model = model_builder.TinyVGG(input_shape=3,
                              hidden_units=HIDDEN_UNITS,
                              output_shape=len(class_names))

loss_fn = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)


start_time = timer()
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optim=optim,
             epochs=NUM_EPOCHS)

end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

utils.save_model(model=model,
                 target_dir="models",
                 model_name="pokemon_model.pth")

Overwriting pokemon_modules/train.py


In [55]:
!python pokemon_modules/train.py

Epoch: 1 | train_loss: 6.4578 | train_acc: 0.0268 | test_loss: 5.9659 | test_acc: 0.1250
Epoch: 2 | train_loss: 4.4590 | train_acc: 0.2481 | test_loss: 3.8429 | test_acc: 0.3882
Epoch: 3 | train_loss: 2.8168 | train_acc: 0.4835 | test_loss: 2.9185 | test_acc: 0.5484
Epoch: 4 | train_loss: 2.0086 | train_acc: 0.6154 | test_loss: 2.4629 | test_acc: 0.6328
Epoch: 5 | train_loss: 1.5195 | train_acc: 0.7020 | test_loss: 2.1082 | test_acc: 0.6900
Epoch: 6 | train_loss: 1.1902 | train_acc: 0.7579 | test_loss: 1.8849 | test_acc: 0.7362
Epoch: 7 | train_loss: 0.9715 | train_acc: 0.7940 | test_loss: 1.7443 | test_acc: 0.7609
[INFO] Total training time: 3482.950 seconds
[INFO] Saving model to: models\pokemon_model.pth



  0%|          | 0/7 [00:00<?, ?it/s]
 14%|█▍        | 1/7 [08:39<51:58, 519.68s/it]
 29%|██▊       | 2/7 [15:35<38:11, 458.34s/it]
 43%|████▎     | 3/7 [22:32<29:19, 439.76s/it]
 57%|█████▋    | 4/7 [29:20<21:21, 427.25s/it]
 71%|███████▏  | 5/7 [39:10<16:11, 485.84s/it]
 86%|████████▌ | 6/7 [49:43<08:55, 535.97s/it]
100%|██████████| 7/7 [58:02<00:00, 523.77s/it]
100%|██████████| 7/7 [58:02<00:00, 497.49s/it]
