<a href="https://colab.research.google.com/github/Adityaasati/PyTorch-Face-Recognition/blob/main/Pytorch_Face_Recognition_Scripts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [81]:
import os

os.makedirs("going_modular", exist_ok=True)

In [105]:
import requests
import zipfile
import os
from pathlib import Path
import random
import shutil

# Define paths
data_path = Path("data/")
image_path = data_path / "celebrity_face_image_dataset"


# Check if the directory exists, otherwise create it
if image_path.is_dir():
    print(f"{image_path} directory already exists... skipping download")
else:
    print(f"{image_path} does not exist, creating one...")
    image_path.mkdir(parents=True, exist_ok=True)

# Download dataset
dataset_url = "https://github.com/Adityaasati/PyTorch-Face-Recognition/raw/refs/heads/main/celebrity_face_image_dataset.zip"
with open(data_path / "celebrity_face_image_dataset.zip", "wb") as f:
    request = requests.get(dataset_url)
    print("Downloading Celebrities Face Image data...")
    f.write(request.content)

# Unzip the downloaded file
with zipfile.ZipFile(data_path / "celebrity_face_image_dataset.zip", "r") as zip_ref:
    print("Unzipping Celebrities Face Image data...")
    zip_ref.extractall(image_path)





data/celebrity_face_image_dataset directory already exists... skipping download
Downloading Celebrities Face Image data...
Unzipping Celebrities Face Image data...


In [83]:
image_path = image_path/"celebrity_face_image_dataset"
train_dir = image_path/"train"
test_dir = image_path/"test"

In [103]:
%%writefile going_modular/Customs.py

import random
from PIL import Image
from typing import Tuple, Dict, List
from torch.utils.data import Dataset,DataLoader
from pathlib import Path
import os
import torch

data_path = Path("data/")
image_path = data_path / "celebrity_face_image_dataset"/"celebrity_face_image_dataset"
train_dir = image_path/"train"
test_dir = image_path/"test"
target_directory = train_dir

class_names = sorted([entry.name for entry in os.scandir(target_directory)])

def class_and_idx(target_directory):
  class_names = sorted(entry.name for entry in os.scandir(target_directory) if entry.is_dir())

  class_idx = {class_name: i for i,class_name in enumerate(class_names)}

  return class_names,class_idx

class ImageFolderCustom(Dataset):
  def __init__(self, targ_dir:str, transform=None):
    self.paths = list(Path(targ_dir).glob("*/*.jpg"))
    self.transform=transform
    self.classes, self.class_to_idx = class_and_idx(targ_dir)
    print(self.class_to_idx,"self.class_to_idx")



  def load_image(self, indx:int) -> Image:
    image_path = self.paths[indx]
    return Image.open(image_path)

  def __len__(self):
    return len(self.paths)

  def __getitem__(self, index:int) ->Tuple[torch.Tensor, int]:
    img = self.load_image(index)
    class_name = self.paths[index].parent.name
    class_idx = self.class_to_idx[class_name]

    if self.transform:
      return self.transform(img), class_idx
    else:
      return img, class_idx



Overwriting going_modular/Customs.py


In [108]:
%%writefile going_modular/data_setup.py

import os

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import Customs


NUM_WORKERS = os.cpu_count()



def create_dataloaders(
    train_dir: str,
    test_dir: str,
    transform: transforms.Compose,
    batch_size: int,
    num_workers: int=NUM_WORKERS
):
  train_data = Customs.ImageFolderCustom(train_dir, transform=transform)
  test_data = Customs.ImageFolderCustom(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_custom_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_custom_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_custom_dataloader, test_custom_dataloader, class_names

Overwriting going_modular/data_setup.py


In [91]:
%%writefile going_modular/model_builder.py

from torch import nn
class TinyVGG(nn.Module):
  def __init__(self,
               input_shape: int,
               hidden_units:int,
               output_shape:int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape,out_channels=hidden_units,kernel_size=3,stride=1,padding=0),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,kernel_size=3,stride=1,padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,stride=2)
    )
    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,kernel_size=3,stride=1,padding=0),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,kernel_size=3,stride=1,padding=0),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2,stride=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*29*29,out_features=output_shape)
    )


  def forward(self,x):
    return self.classifier(self.conv_block_2(self.conv_block_1(x)))


Overwriting going_modular/model_builder.py


In [92]:
import torch

from going_modular import model_builder

device = "cuda" if torch.cuda.is_available() else "cpu"

# Instantiate an instance of the model from the "model_builder.py" script
torch.manual_seed(42)
model_1 = model_builder.TinyVGG(input_shape=3, # number of color channels (3 for RGB)
                                hidden_units=10,
                                output_shape=len(class_names)).to(device)
model_1

TinyVGG(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=8410, out_features=5, bias=True)
  )
)

In [94]:
%%writefile going_modular/engine.py
from tqdm.auto import tqdm
from torch import nn
import torch
from typing import Tuple, Dict, List


def train_fn(model:torch.nn.Module,
             dataloader:torch.utils.data.DataLoader,
             loss_fn:torch.nn.Module,
             optimizer:torch.optim.Optimizer,
             device: torch.device) -> Tuple[float, float]:

  model.train()

  train_loss, train_acc =0,0


  for batch, (X,y) in enumerate(dataloader):

    X,y = X.to(device), y.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred, y)
    train_loss+=loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  train_loss = train_loss/len(dataloader)
  train_acc = train_acc/len(dataloader)
  return train_loss,train_acc

def test_fn(model:torch.nn.Module,
            dataloader: torch.utils.data.DataLoader,
            loss_fn:torch.nn.Module,
            device: torch.device) -> Tuple[float, float]:

  model.eval()
  test_loss, test_acc=0,0

  with torch.inference_mode():
    for batch, (X,y) in enumerate(dataloader):
      X,y = X.to(device),y.to(device)
      test_pred = model(X)

      loss = loss_fn(test_pred,y)
      test_loss+=loss.item()

      test_labels = test_pred.argmax(dim=1)
      test_acc += (test_labels == y).sum().item()/len(test_pred)

  test_loss = test_loss/len(dataloader)
  test_acc = test_acc/len(dataloader)

  return test_loss, test_acc




def train_and_test(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List[float]]:

  results = {"train_acc":[],
             "train_loss":[],
             "test_acc":[],
             "test_loss":[]
             }
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc  = train_fn(model=model,
                                    dataloader=train_dataloader,
                                    loss_fn=loss_fn,
                                    optimizer=optimizer,
                                      device=device)
    test_loss, test_acc  = test_fn(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)
    print(f"Epoch: {epoch} | Train Loss: {train_loss} | Train acc: {train_acc} | Test Loss: {test_loss} | Test acc: {test_acc}")
    results['train_acc'].append(train_acc)
    results['train_loss'].append(train_loss)
    results['test_acc'].append(test_acc)
    results['test_loss'].append(test_loss)

  return results





Overwriting going_modular/engine.py


In [95]:
%%writefile going_modular/utils.py

from pathlib import Path

import torch

def save_model(model: torch.nn.Module,
               target_dir: str,
               model_name: str):
    # Create target directory
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                        exist_ok=True)

    # Create model save path
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
    model_save_path = target_dir_path / model_name

    # Save the model state_dict()
    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(),
             f=model_save_path)

Overwriting going_modular/utils.py


 ### This will allow us to run all of the functions we've written with a single line of code on the command line:

python going_modular/train.py

Or if we're running it in a notebook:

!python going_modular/train.py

In [111]:
%%writefile going_modular/train.py


import os
import torch
from pathlib import Path
from torchvision import transforms
import data_setup, engine, model_builder, utils


# Setup hyperparameters
NUM_EPOCHS = 5
BATCH_SIZE = 32
HIDDEN_UNITS = 10
LEARNING_RATE = 0.001

# Setup directories
# train_dir = "data/pizza_steak_sushi/train"
# test_dir = "data/pizza_steak_sushi/test"
data_path = Path("data/")
image_path = data_path / "celebrity_face_image_dataset"/ "celebrity_face_image_dataset"
train_dir = image_path/"train"
test_dir = image_path/"test"

# Setup target device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create transforms
data_transform = transforms.Compose([transforms.Resize(size=(128,128)),
                                                       transforms.RandomHorizontalFlip(p=0.5),
                                                       transforms.ToTensor()])

# Create DataLoaders with help from data_setup.py
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=data_transform,
    batch_size=BATCH_SIZE
)

# Create model with help from model_builder.py
model = model_builder.TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=len(class_names)
).to(device)

# Set loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

print(device,"device")
# Start training with help from engine.py
engine.train_and_test(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=NUM_EPOCHS,
             device=device)

# Save the model with help from utils.py
utils.save_model(model=model,
                 target_dir="models",
                 model_name="going_modular_script_mode_tinyvgg_model.pth")

Overwriting going_modular/train.py


In [112]:
!python going_modular/train.py

{'Brad Pitt': 0, 'Johnny Depp': 1, 'Leonardo DiCaprio': 2, 'Tom Cruise': 3, 'Will Smith': 4} self.class_to_idx
{'Brad Pitt': 0, 'Johnny Depp': 1, 'Leonardo DiCaprio': 2, 'Tom Cruise': 3, 'Will Smith': 4} self.class_to_idx
cpu device
  0% 0/5 [00:00<?, ?it/s]Epoch: 0 | Train Loss: 1.6346796567623432 | Train acc: 0.15865384615384615 | Test Loss: 1.5909542739391327 | Test acc: 0.375
 20% 1/5 [00:08<00:35,  9.00s/it]Epoch: 1 | Train Loss: 1.612407244168795 | Train acc: 0.20432692307692307 | Test Loss: 1.59875950217247 | Test acc: 0.3359375
 40% 2/5 [00:17<00:26,  8.71s/it]Epoch: 2 | Train Loss: 1.6062113963640654 | Train acc: 0.2283653846153846 | Test Loss: 1.5880542397499084 | Test acc: 0.1484375
 60% 3/5 [00:25<00:16,  8.24s/it]Epoch: 3 | Train Loss: 1.6013263280575092 | Train acc: 0.21875 | Test Loss: 1.5955927968025208 | Test acc: 0.1640625
 80% 4/5 [00:33<00:08,  8.40s/it]Epoch: 4 | Train Loss: 1.589718561906081 | Train acc: 0.2283653846153846 | Test Loss: 1.5923909842967987 | Test ac