# Notebook for the development of the model to power the face detection

### Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
import cv2
import torch
from PIL import Image
from torchvision.datasets import WIDERFace
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Compose, Resize
from torchvision.models.detection.faster_rcnn import fasterrcnn_mobilenet_v3_large_320_fpn, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, FastRCNNPredictor
from collections import defaultdict
import os
import pathlib
import random
from typing import Tuple, List

### Constants

In [None]:
ROOT = "/kaggle/working" # Change to your root directory

### Download Data

In [None]:
!pip install gdown

In [None]:
WIDERFace(root="", download=True)

## Data Exploration

### Build dictionary with image name and folder as key and a list with the number of faces and the coordinates of the bounding boxes as value

In [None]:
# Make list from ground truth .txt file
ground_truth_file_list = []
with open(ROOT + "/widerface/wider_face_split/wider_face_train_bbx_gt.txt") as file:
  for line in file:
    ground_truth_file_list.append(line.strip())

In [None]:
images_ground_truth = {}
index = 0
while index < len(ground_truth_file_list):
  if len(ground_truth_file_list[index].split(".")) == 2:
    image_name = ground_truth_file_list[index].strip()
    index += 1
    num_faces = int(ground_truth_file_list[index].strip())
    ground_truth = [num_faces]
    for _ in range(num_faces):
      index += 1
      ground_truth.append(list(map(lambda x: int(x), ground_truth_file_list[index].split(" ")))[:4]) # only num_faces / x, y, width, height
    images_ground_truth[image_name] = ground_truth
  index += 1

### Visualize 4 random example images with bounding boxes

In [None]:
def plot_image(axs, x, y, image):
  ground_truth = images_ground_truth[image]
  axs[x, y].imshow(mpimg.imread(ROOT + "/widerface/WIDER_train/images/" + image))
  for rect in ground_truth[1:]:
    rect_x, rect_y, width, height = rect
    rect_obj = patches.Rectangle((rect_x, rect_y), width, height, linewidth=2, edgecolor='r', facecolor='none')
    axs[x, y].add_patch(rect_obj)
  image_name = image.split("/")[1].split(".")[0]
  axs[x, y].set_title(image_name)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(8, 8))

images = random.sample(list(images_ground_truth.keys()), 4)
# Plot first image
plot_image(axs, 0, 0, images[0])
# Plot second image
plot_image(axs, 0, 1, images[1])
# Plot third image
plot_image(axs, 1, 0, images[2])
# Plot fourth image
plot_image(axs, 1, 1, images[3])

### Explore Distribution over number of faces
-> Use log scale to better visualize extreme values

In [None]:
dist_num_faces = defaultdict(int)
with open(ROOT + "/widerface/wider_face_split/wider_face_train_bbx_gt.txt") as file:
  for line in file:
    if len(line.split(" ")) == 1 and len(line.split(".")) == 1:
      dist_num_faces[int(line)] += 1
print(f"There are {dist_num_faces[0]} images with zero faces.")

In [None]:
num_faces = list(dist_num_faces.keys())
frequency = list(dist_num_faces.values())

plt.bar(num_faces, frequency, log=True)
plt.xlabel('Number of faces in image')
plt.ylabel('Number of images')
plt.title('Distribution of images by number of faces')

plt.show()

## Build Dataset class

In [None]:
class FaceImageDataset(Dataset):
  """Dataset class for the WIDERFACE dataset.

  Attributes:
    data_dir (str): path to the directory with image directories in it.
    transform: transformation to perform on images.
  """


  def __init__(self, data_dir: str, ground_truth_file: str, transform=None) -> None:
    """Constructs Dataset class.

    Attributes:
      data_dir (str): path to the directory with image directories in it.
      transform: transformation to perform on images.
    """
    # Paths to the images
    self.__paths = list(map(lambda path: str(path), pathlib.Path(data_dir).glob("*/*.jpg")))
    self.__ground_truth_file = ground_truth_file
    self.__transform = transform
    self.__label_data = self.__get_label_data()


  def __get_label_data(self) -> dict:
    """Return dictionary with number of faces and bounding boxes for all images.
    """
    label_file = self.__get_label_file()
    label_data = {}
    index = 0
    index_image = 0
    while index < len(label_file):
      if len(label_file[index].split(".")) == 2:
        image_name = label_file[index].strip()
        index += 1
        num_faces = int(label_file[index].strip())
        boxes = []
        image = self.__load_image(index_image)
        index_image += 1
        original_height, original_width = image.shape[1:]
        for _ in range(num_faces):
          index += 1
          # only num_faces / x, y, width, height
          box_data = list(map(lambda x: int(x), label_file[index].split(" ")))[:4]
          if box_data[2] > 0 and box_data[3] > 0:
            x_0 = box_data[0] / original_width * 320
            x_1 = (box_data[0] + box_data[2]) / original_width * 320
            y_0 = box_data[1] / original_height * 320
            y_1 = (box_data[1] + box_data[3]) / original_height * 320
            box = [min(x_0, x_1), min(y_0, y_1), max(x_0, x_1), max(y_0, y_1)]
            boxes.append(box)
          else:
            num_faces -= 1
        label = {"labels": torch.tensor([1 for _ in range(num_faces)], dtype=torch.int64), "boxes": torch.tensor(boxes)}
        label_data[image_name] = label
      index += 1
    return label_data


  def __get_label_file(self) -> List[str]:
    """Returns list containing the lines of the ground truth file.
    """
    label_file_list = []
    with open(self.__ground_truth_file) as file:
      for line in file:
        label_file_list.append(line.strip())
    return label_file_list


  def __len__(self) -> int:
    """Returns the number of images.
    """
    return len(self.__paths)


  def __get_label(self, index: int) -> List:
    """Returns list with number of faces and bounding boxes for image at position index.

    Attributes:
      index (int): index of the image to get label data for.
    """
    image_path = self.__paths[index]
    path_parts = image_path.split("/")
    image_name = path_parts[-2] + "/" + path_parts[-1]
    return self.__label_data[image_name]


  def __load_image(self, index: int) -> Image.Image:
    """Return Image in Tensor form.

    Attributes:
      index (int): index of the image to load.
    """
    image_path = self.__paths[index]
    image = Image.open(image_path)
    tensor_transform = ToTensor()
    tensor_image = tensor_transform(image)
    return tensor_image


  def __getitem__(self, index: int) -> Tuple[torch.Tensor, List]:
    """Returns (transformed) image and label data for given index.

    Attributes:
      index (int): index for the item to get.
    """
    image = self.__load_image(index)
    original_height, original_width = image.shape[1:]
    label = self.__get_label(index)
    if not self.__transform:
      raise ValueError("No transform specified!")
    image = self.__transform(image)
    item = {"image": image, "targets": label}
    if len(label["boxes"]) == 0:
      return None
    else:
      return item

## Create DataLoaders, Transforms, etc.

In [None]:
# auto transformation
weights = FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT
transform = Compose([
    Resize((320, 320)),
    weights.transforms()
    ])

In [None]:
train_data_dir = ROOT + "/widerface/WIDER_train/images/"
train_gt_file = ROOT + "/widerface/wider_face_split/wider_face_train_bbx_gt.txt"
train_dataset = FaceImageDataset(train_data_dir, train_gt_file, transform)

In [None]:
val_data_dir = ROOT + "/widerface/WIDER_val/images/"
val_gt_file = ROOT + "/widerface/wider_face_split/wider_face_val_bbx_gt.txt"
val_dataset = FaceImageDataset(val_data_dir, val_gt_file, transform)

In [None]:
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    X = [item["image"] for item in batch]
    y = [item["targets"] for item in batch]
    return X, y
train_dataloader = DataLoader(train_dataset, 32, shuffle=True, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, 32, shuffle=True, collate_fn=collate_fn)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

## Create and train model

In [None]:
model = fasterrcnn_mobilenet_v3_large_320_fpn(weights=weights).to(device)
# Configure the model for only use case
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.002, momentum=0.9, weight_decay=0.0010)

In [None]:
torch.save(model, ROOT + "/fasterrcnn_complete_model_final.pth")
model

In [None]:
# Optional if training is splitted into multiple sessions
#model = torch.load(ROOT + "/fasterrcnn_complete_model_final.pth")
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0010)

In [None]:
model.train()
model.to(device)
num_epochs = 30
train_losses = []
val_losses = []
for epoch in range(num_epochs):
    loss = 0
    for batch, (X, y) in enumerate(train_dataloader):
        X = [x.to(device) for x in X]
        y = [{"labels": target["labels"].to(device), "boxes": target["boxes"].to(device)} for target in y]
        # forward pass
        loss_dict = model(X, y)
        losses = sum(loss for loss in loss_dict.values())
        loss += losses
        # backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Loss after epoch {epoch+1}: {round(float(loss)/len(train_dataloader), 3)}.")
    train_losses.append(loss)
    if epoch % 10 == 0:
        loss = 0
        with torch.no_grad():
            for batch, (X, y) in enumerate(val_dataloader):
                X = [x.to(device) for x in X]
                y = [{"labels": target["labels"].to(device), "boxes": target["boxes"].to(device)} for target in y]
                # forward pass
                loss_dict = model(X, y)
                losses = sum(loss for loss in loss_dict.values())
                loss += float(losses)
        print(f"Loss (val) after epoch {epoch+1}: {round(float(loss)/len(val_dataloader), 3)}.")
        val_losses.append(loss)
        torch.save(model, ROOT + f"/fasterrcnn_complete_model{epoch//10}.pth")

In [None]:
torch.save(model, ROOT + "/fasterrcnn_complete_model_final.pth")

## Model evaluation (work in progress) test

In [None]:
val_data_dir = ROOT + "/widerface/WIDER_val/images/"
val_gt_file = ROOT + "/widerface/wider_face_split/wider_face_val_bbx_gt.txt"
val_dataset = FaceImageDataset(val_data_dir, val_gt_file, transform)

In [None]:
val_dataloader = DataLoader(val_dataset, 32, shuffle=True, collate_fn=collate_fn)

In [None]:
val_loss = 0
image_boxes = []
model.eval()
model.to(device)
count = 0
with torch.no_grad():
    for batch, (X, y) in enumerate(train_dataloader):
      if batch < 5:
        count += len(X)
        X = [x.to(device) for x in X]
        y = [{"labels": target["labels"].to(device), "boxes": target["boxes"].to(device)} for target in y]
        preds = model(X, y)
        for x, y, pred in zip(X, y, preds):
          image_boxes.append((x, pred["boxes"], y["boxes"], pred["scores"]))
      else:
        break
image_boxes[0]

In [None]:
def plot_image_pred_boxes(axs, x, y, image, boxes_pred, boxes_truth):
  axs[x, y].imshow(image)
  for rect in boxes_pred:
    x_0, y_0, x_1, y_1 = rect
    rect_obj = patches.Rectangle((min(x_0, x_1), min(y_0, y_1)), abs(x_0-x_1), abs(y_0-y_1), linewidth=2, edgecolor='r', facecolor='none', alpha=0.5)
    axs[x, y].add_patch(rect_obj)
  for rect in boxes_truth:
    x_0, y_0, x_1, y_1 = rect
    rect_obj = patches.Rectangle((min(x_0, x_1), min(y_0, y_1)), abs(x_0-x_1), abs(y_0-y_1), linewidth=2, edgecolor='green', facecolor='none')
    axs[x, y].add_patch(rect_obj)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(8, 8))

data = random.sample(image_boxes, 4)
plot_image_pred_boxes(axs, 0, 0, data[0][0].cpu().numpy().transpose(1, 2, 0), data[0][1].cpu().numpy(), data[0][2].cpu().numpy())
plot_image_pred_boxes(axs, 1, 0, data[1][0].cpu().numpy().transpose(1, 2, 0), data[1][1].cpu().numpy(), data[1][2].cpu().numpy())
plot_image_pred_boxes(axs, 0, 1, data[2][0].cpu().numpy().transpose(1, 2, 0), data[2][1].cpu().numpy(), data[2][2].cpu().numpy())
plot_image_pred_boxes(axs, 1, 1, data[3][0].cpu().numpy().transpose(1, 2, 0), data[3][1].cpu().numpy(), data[3][2].cpu().numpy())

In [None]:
image_boxes[8]