In [140]:
import xml.etree.ElementTree as ET
import pathlib
from pathlib import Path
from PIL import Image
import os
from torch.utils.data import Dataset
import torch
from matplotlib import pyplot as plt
from torchvision import transforms
from natsort import natsorted
import cv2
import numpy as np
import random

from sklearn.model_selection import train_test_split

In [141]:
image_dir = Path(os.getcwd() + "/../data/images")
annotations_dir = Path(os.getcwd()+"/../data/annotations")
image_dir, annotations_dir

(PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/annotations'))

In [142]:
def parse_xml(xml_file):
# Get xml tree root
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get all "ojbect" tags in the file
    objects = root.findall('object')
    # Get annotations which contain all labels and boundboxes
    object_annotations = []
    for obj in objects:
        # Get bound box coords and labels for each face in the image
        label = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        object_annotations.append({
            'label(s)': label,
            'bbox(s)': [xmin, ymin, xmax, ymax]
        })
    return object_annotations

## 1. Creating a `torch.utils.data` Dataset and Visualizing Images, bboxes and labels

In [143]:
class FaceMaskDataset(Dataset):
    def __init__(self, image_dir, annotations_dir, transform= None):
        super().__init__()
        # Getting a sorted list of all image and annotations file names
        self.image_paths = natsorted(list(pathlib.Path(image_dir).glob("*.png")))
        self.annotation_paths = natsorted(list(pathlib.Path(annotations_dir).glob("*.xml")))

        # Getting transforms if found
        self.transform = transform
        # class_to_idx will be used when training a model
        self.class_to_idx = {"with_mask": 0, "without_mask": 1, "mask_weared_incorrect": 2}
        self.classes = ["with_mask", "without_mask", "mask_weared_incorrect"] 

    # Overriding the __getitem__() function to return a PIL image and its associated annotations
    def __getitem__(self, idx: int):
        image_path = self.image_paths[idx]
        image = Image.open(image_path)
        image = image.convert("RGB")
        object_annotations = parse_xml(self.annotation_paths[idx])

        if self.transform:
            original_width, original_height = image.size
            image = self.transform(image)
            new_width, new_height = image.shape[1], image.shape[2]
            width_scale= new_width / original_width
            height_scale= new_height / original_height
            
            for annotation in object_annotations:
                xmin, ymin, xmax, ymax = annotation['bbox(s)']
                annotation['bbox(s)'] = [
                    int(xmin * width_scale),
                    int(ymin * height_scale),
                    int(xmax * width_scale),
                    int(ymax * height_scale)
                ]
                label = annotation['label(s)']
                annotation['label(s)'] = torch.tensor(self.class_to_idx[label], dtype= torch.int64)
                annotation['bbox(s)'] = torch.tensor(annotation['bbox(s)'], dtype= torch.float32)


            return image, object_annotations
        else:
            return image, object_annotations


    def __len__(self) -> int:
        if len(self.image_paths) == len(self.annotation_paths): 
            return len(self.image_paths)
        else:
            print("Error num of images != num of annotations \n")
            return -1

In [144]:
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size= (224, 224))
])

In [145]:
all_data = FaceMaskDataset(image_dir, annotations_dir, transform= train_transforms)

In [146]:
data = all_data.__getitem__(5)
ann_test = data[1]
ann_test


[{'label(s)': tensor(0), 'bbox(s)': tensor([66., 45., 90., 80.])},
 {'label(s)': tensor(1), 'bbox(s)': tensor([203.,  25., 224.,  55.])},
 {'label(s)': tensor(1), 'bbox(s)': tensor([107.,  89., 128., 121.])},
 {'label(s)': tensor(0), 'bbox(s)': tensor([139.,  80., 162., 117.])}]

In [157]:
def visualize_random_image_with_bbox(dataset: Dataset):
    index = random.randrange(1, dataset.__len__() - 1)
    data = dataset.__getitem__(index)
    
    # Permute tensor image for the preferred shape by opencv (C,H,W) -> (H,W,C)
    tensor_image, image_annotations = data[0], data[1]
    tensor_image = torch.permute(tensor_image, (1, 2, 0))
    
    np_image = np.array(tensor_image).copy()
    opencv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)



    for ann in image_annotations:
        label_int = ann['label(s)'].item()
        label_txt = dataset.classes[label_int]
        bbox = ann['bbox(s)']

        xmin, ymin, xmax, ymax = int(bbox[0].item()), int(bbox[1].item()), int(bbox[2].item()), int(bbox[3].item())
        cv2.rectangle(img= opencv_image,
                      pt1= (xmin,ymin),
                      pt2= (xmax,ymax),
                      color= (0, 0, 255),
                      shift= 0)
        cv2.putText(img= opencv_image,
                    text= label_txt,
                    org= (xmin - 10, ymin - 5),
                    fontFace= cv2.FONT_HERSHEY_PLAIN,
                    fontScale= 1,
                    color= (0, 255, 0),
                    thickness= 2)
    
    cv2.imshow("Image", opencv_image)
    cv2.waitKey(6000)
    cv2.destroyAllWindows()

visualize_random_image_with_bbox(all_data)
        

  np_image = np.array(tensor_image).copy()


In [158]:
from torch.utils.data import DataLoader
dataset = FaceMaskDataset(image_dir= image_dir, annotations_dir= annotations_dir, transform= train_transforms)

train_dataset, test_dataset = train_test_split(dataset, train_size= 0.80, random_state= 42)
train_dataset.__len__(), test_dataset.__len__()

(682, 171)

In [159]:
def custom_collate_fn(batch):
    return tuple(zip(*batch))

In [160]:
train_dataloader = DataLoader(train_dataset,
                              batch_size= 2,
                              shuffle= True,
                              num_workers= 0,
                              collate_fn= custom_collate_fn)
test_dataloader = DataLoader(test_dataset,
                             batch_size= 2,
                             shuffle= False,
                             num_workers= 0,
                             collate_fn= custom_collate_fn)
images, annotations = next(iter(train_dataloader))
images[0].shape, annotations[1]

(torch.Size([3, 224, 224]),
 [{'label(s)': tensor(0), 'bbox(s)': tensor([ 72.,  42., 147.,  76.])},
  {'label(s)': tensor(1), 'bbox(s)': tensor([186.,  33., 224.,  65.])},
  {'label(s)': tensor(1), 'bbox(s)': tensor([47., 53., 76., 68.])},
  {'label(s)': tensor(0), 'bbox(s)': tensor([ 9., 39., 32., 51.])}])