<a href="https://colab.research.google.com/github/Andreluis2001/Pytorch_helper_library/blob/main/Pytorch_helper_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
%%writefile data_setup.py

import os
import torch
import requests
import zipfile

from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from typing import Tuple, Dict, List

def download_data(
    url_source: str,
    destination: str,
    remove_source: bool = True
) -> Path:

  data_dir_path = Path("data")
  data_path = data_dir_path / destination

  if data_path.is_dir():
    print(f"{data_path} already exists, skipping download")

  else:
    data_path.mkdir(parents=True, exist_ok=True)

    target_zip_file = Path(url_source).name

    with open(data_dir_path / target_zip_file, "wb") as f:

      request = requests.get(url_source)
      f.write(request.content)

    with zipfile.ZipFile(data_dir_path / target_zip_file, "r") as zip_f:

      zip_f.extractall(data_path)

    if remove_source:
      os.remove(data_dir_path / target_zip_file)

    return data_path

def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transforms: transforms.Compose,
    batch_size: int = 32,
    num_workers: int = 1
) -> Tuple[
    torch.utils.data.DataLoader,
    torch.utils.data.DataLoader,
    List[str]
  ]:

  train_data = datasets.ImageFolder(root=train_dir, transform=transforms)
  test_data = datasets.ImageFolder(root=test_dir, transform=transforms)

  class_names = train_data.classes

  train_dataloader = DataLoader(
                                dataset=train_data,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers,
                                pin_memory=True
                              )
  test_dataloader = DataLoader(
                                dataset=test_data,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers,
                                pin_memory=True
                              )

  return train_dataloader, test_dataloader, class_names

Overwriting data_setup.py


In [7]:
%%writefile train_test_model.py

import torch
from sklearn.metrics import accuracy_score
from typing import Tuple, Dict, List

def train_model(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module,
    optimizer: torch.nn.Module
) -> Tuple[float, float]:

  model.train()

  train_loss, train_acc= 0, 0

  for X, y in train_dataloader:

    train_pred = model(X)

    loss = loss_fn(train_pred, y)
    train_loss += loss.item()

    accuracy = accuracy_score(
        y,
        torch.argmax(torch.softmax(train_pred, dim=1), dim=1)
    )
    train_acc += accuracy

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)

  return train_loss, train_acc

def test_model(
    model: torch.nn.Module,
    test_dataloader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module
) -> Tuple[float, float]:

  test_loss, test_acc= 0, 0

  model.eval()

  with torch.inference_mode():

    for X, y in test_dataloader:

      test_pred = model(X)

      loss = loss_fn(test_pred, y)
      test_loss += loss.item()

      accuracy = accuracy_score(
          y,
          torch.argmax(torch.softmax(test_pred, dim=1), dim=1)
      )
      test_acc += accuracy

    test_loss /= len(test_dataloader)
    test_acc /= len(test_dataloader)

  return test_loss, test_acc

def train_test_model(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module,
    optimizer: torch.nn.Module,
    epochs: int = 5
) -> Dict[str, float]:

  results = {
      "train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  for epoch in range(epochs):

    train_loss, train_acc = train_model(model, train_dataloader, loss_fn, optimizer)
    test_loss, test_acc = test_model(model, test_dataloader, loss_fn)

    print(
        "-------------------------------------\n"
        f"Epoch: {epoch+1} |\n"
        f"Train Loss: {train_loss:.4f} |\n"
        f"Train Acc: {(train_acc * 100):.4f}% |\n"
        f"Test Loss: {test_loss:.4f} |\n"
        f"Test Acc: {(test_acc * 100):.4f}%\n"
        "-------------------------------------\n"
    )

    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)

  return results

Writing train_test_model.py


In [13]:
%%writefile utils.py

import torch
from pathlib import Path

def save_model(
    model: torch.nn.Module,
    target_dir: str,
    model_name: str
) -> None:

  target_dir_path = Path(target_dir)
  target_dir_path.mkdir(parents=True, exist_ok=True)

  if not (model_name.endswith(".pth") or model_name.endswith(".pt")):
    model_name += ".pth"

  model_save_path = target_dir_path / model_name

  print(f"Saving model state dictionary to {model_save_path}")

  torch.save(obj=model.state_dict(), f=model_save_path)

def set_seed(seed_to_set: int = 42):

  torch.manual_seed(seed=seed_to_set)
  torch.cuda.manual_seed(seed=seed_to_set)

Overwriting utils.py
