# Going Modular using Python Scripts

In this notebook we use python scripts to get data, transform, train, evaluate and save the model using one command in the terminal.

# Get data

In [86]:
import os
import zipfile
import requests
from pathlib import Path

data_path=Path('data/')
image_path=data_path/'pizza_steak_sushi_mod'

if image_path.is_dir():
  print(f"Directory {image_path} already exists!")
else:
  image_path.mkdir(parents=True,exist_ok=True)
  print("Directory Created!")

with open(data_path/'pizza_steak_sushi.zip','wb') as f:
  request=requests.get('https://github.com/HarshEkbote/PyTorch-Basics/raw/main/data/pizza_steak_sushi.zip')
  f.write(request.content)
  print("Dataset downloaded")

with zipfile.ZipFile(data_path/'pizza_steak_sushi.zip','r') as zipref:
  zipref.extractall(image_path)
  print("Unzipped the zip file!")

os.remove(data_path/'pizza_steak_sushi.zip')

Directory data/pizza_steak_sushi_mod already exists!
Dataset downloaded
Unzipped the zip file!


#Creating Dataset and DataLoaders in script mode


In [87]:
try:
  os.mkdir('going_modular')
except:
  print("Already exists")

Already exists


In [88]:
%%writefile going_modular/data_setup.py
"Contains the functionality for craeting Pytorch dataloaders for image classification data"

import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms,datasets

NUM_WORK=os.cpu_count()

def create_dataloader(train_dir:str,test_dir:str,transform:transforms.Compose,batch_size:int,num_workers:int=NUM_WORK):
  train_data=datasets.ImageFolder(train_dir,transform=transform)
  test_data=datasets.ImageFolder(test_dir,transform=transform)

  class_name=train_data.classes

  train_dataloader=DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True
  )

  test_dataloader=DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True
  )

  return train_dataloader,test_dataloader,class_name

Overwriting going_modular/data_setup.py


# Creting the model in script mode

In [89]:
%%writefile going_modular/model_builder.py

import torch
from torch import nn

class TinyVGG(nn.Module):
  def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
    super().__init__()
    self.block_1 = nn.Sequential(
          nn.Conv2d(in_channels=input_shape,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units,
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=0),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2,
                        stride=2)
      )
    self.block_2 = nn.Sequential(
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
          nn.ReLU(),
          nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=0),
          nn.ReLU(),
          nn.MaxPool2d(2)
        )
    self.classifier = nn.Sequential(
          nn.Flatten(),
          nn.Linear(in_features=hidden_units*13*13,
                    out_features=output_shape)
        )
  def forward(self, x: torch.Tensor):
        # x = self.conv_block_1(x)
        # x = self.conv_block_2(x)
        # x = self.classifier(x)
        # return x
        return self.classifier(self.block_2(self.block_1(x)))

Overwriting going_modular/model_builder.py


In [90]:
import torch
from going_modular import model_builder
device='cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(42)
model0=model_builder.TinyVGG(input_shape=2,hidden_units=10,output_shape=3).to(device)

model0

TinyVGG(
  (conv_block_1): Sequential(
    (0): Conv2d(2, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=1690, out_features=3, bias=True)
  )
)

# Train loop, Test Loop and Training the model

In [91]:
%%writefile going_modular/engine.py

from typing import Tuple,Dict,List
import torch
from tqdm.auto import tqdm

def train_step(model:torch.nn.Module,dataloader:torch.utils.data.DataLoader,loss_fn:torch.nn.Module,optimizer:torch.optim.Optimizer,device:torch.device)->Tuple[float,float]:

  model.train()

  train_loss,train_acc=0,0

  for batch,(x,y) in enumerate(dataloader):
    #print(x.dtype)
    x,y=x.to(device),y.to(device)

    y_pred=model(x)

    loss=loss_fn(y_pred,y)
    train_loss+=loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class=torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
    train_acc+=(y_pred_class==y).sum().item()/len(y_pred)

  train_loss=train_loss/len(dataloader)
  train_acc=train_acc/len(dataloader)
  return train_loss,train_acc

def test_step(model:torch.nn.Module,dataloader:torch.utils.data.DataLoader,loss_fn:torch.nn.Module,device:torch.device)->Tuple[float,float]:
  model.eval()

  test_loss,test_acc=0,0
  with torch.inference_mode():
    for batch,(x,y) in enumerate(dataloader):
      x,y=x.to(device),y.to(device)

      test_pred_logits=model(x)

      loss=loss_fn(test_pred_logits,y)
      test_loss+=loss.item()

      test_pred_label=test_pred_logits.argmax(dim=1)
      test_acc+=((test_pred_label==y).sum().item()/len(test_pred_label))

  test_loss=test_loss/len(dataloader)
  test_acc=test_acc/len(dataloader)
  return test_loss,test_acc

def train(model:torch.nn.Module,train_dataloader:torch.utils.data.DataLoader,test_dataloader:torch.utils.data.DataLoader,loss_fn:torch.nn.Module,optimizer:torch.optim.Optimizer,epochs:int,device:torch.device)->Dict[str,List]:
  results={
      'train_loss':[],
      'train_acc':[],
      'test_loss':[],
      'test_acc':[]
  }

  for epoch in tqdm(range(epochs)):
    train_loss,train_acc=train_step(model,train_dataloader,loss_fn,optimizer,device)
    test_loss,test_acc=test_step(model,test_dataloader,loss_fn,device)

    print(
        f"Epoch: {epoch+1} | "
        f"train_loss: {train_loss:.4f} |"
        f'train_acc: {train_acc:.2f} | '
        f'test_loss: {test_loss:.4f} | '
        f'test_acc: {test_acc:.2f}'
    )

    results['train_loss'].append(train_loss)
    results['train_acc'].append(train_acc)
    results['test_loss'].append(test_loss)
    results['test_acc'].append(test_acc)
  return results

Overwriting going_modular/engine.py


#Script to save the model

In [92]:
%%writefile going_modular/utils.py

from pathlib import Path
import torch

def save_model(model:torch.nn.Module,target_dir:str,model_name:str):
  target_dir_path=Path(target_dir)
  target_dir_path.mkdir(parents=True,exist_ok=True)

  assert model_name.endswith(".pth") or model_name.endswith('.pt'), "Must end with .pt or .pth"
  model_save_path=target_dir_path/model_name

  print(f"[INFO] model saving to {model_save_path}")
  torch.save(obj=model.state_dict(),f=model_save_path)

Overwriting going_modular/utils.py


#Training the model

In [93]:
%%writefile going_modular/train.py

import os
import torch
from torchvision import transforms
import data_setup,engine,model_builder,utils

NUM_EPOCHS=10
BATCH_SIZE=32
HIDDEN_UNITS=10
LRATE=0.01

train_dir='data/pizza_steak_sushi_mod/train'
test_dir='data/pizza_steak_sushi_mod/test'

device= 'cuda' if torch.cuda.is_available() else 'cpu'

data_transform=transforms.Compose([
    transforms.Resize(size=(64,64)),
    transforms.ToTensor()
])

train_dataloader,test_dataloader,class_names=data_setup.create_dataloader(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=BATCH_SIZE
)

model=model_builder.TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=len(class_names)).to(device)

loss_fn=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model.parameters(),lr=LRATE)

engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

utils.save_model(model=model,
                 target_dir='models',
                 model_name='script_based_model.pth')

Overwriting going_modular/train.py


In [94]:
!python going_modular/train.py

  0% 0/10 [00:00<?, ?it/s]Epoch: 1 | train_loss: 1.1468 |train_acc: 0.28 | test_loss: 1.0923 | test_acc: 0.54
 10% 1/10 [00:01<00:16,  1.78s/it]Epoch: 2 | train_loss: 1.0993 |train_acc: 0.41 | test_loss: 1.0998 | test_acc: 0.20
 20% 2/10 [00:03<00:14,  1.78s/it]Epoch: 3 | train_loss: 1.1005 |train_acc: 0.29 | test_loss: 1.1042 | test_acc: 0.20
 30% 3/10 [00:05<00:12,  1.77s/it]Epoch: 4 | train_loss: 1.0937 |train_acc: 0.41 | test_loss: 1.1017 | test_acc: 0.20
 40% 4/10 [00:07<00:11,  1.87s/it]Epoch: 5 | train_loss: 1.1012 |train_acc: 0.29 | test_loss: 1.1041 | test_acc: 0.20
 50% 5/10 [00:10<00:12,  2.41s/it]Epoch: 6 | train_loss: 1.0938 |train_acc: 0.41 | test_loss: 1.1011 | test_acc: 0.20
 60% 6/10 [00:12<00:08,  2.21s/it]Epoch: 7 | train_loss: 1.1011 |train_acc: 0.29 | test_loss: 1.1032 | test_acc: 0.20
 70% 7/10 [00:14<00:06,  2.07s/it]Epoch: 8 | train_loss: 1.1105 |train_acc: 0.29 | test_loss: 1.0997 | test_acc: 0.20
 80% 8/10 [00:16<00:03,  1.98s/it]Epoch: 9 | train_loss: 1.1066 