In [4]:
import xml.etree.ElementTree as ET
import pathlib
from pathlib import Path
from PIL import Image
import os
from torch.utils.data import Dataset
from matplotlib import pyplot as plt
from torchvision import transforms
from natsort import natsorted
import cv2
import numpy as np
import random

In [5]:
image_dir = Path(os.getcwd() + "/../data/images")
annotations_dir = Path(os.getcwd()+"/../data/annotations")
image_dir, annotations_dir

(PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/annotations'))

In [6]:
natsorted(pathlib.Path(image_dir).glob("*.png"))


[PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss0.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss1.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss2.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss3.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss4.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss5.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss6.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss7.png'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images/maksssksksss8.png'),
 PosixPath('/workspace/projects/vision/face-mask-detect

In [7]:
file_name_test = annotations_dir / "maksssksksss0.xml"
tree = ET.parse(file_name_test)
root = tree.getroot()
objects = root.findall("object")
objects[0].find('bndbox').find('xmin').text

'79'

In [8]:
def parse_xml(xml_file):
# Get xml tree root
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get all "ojbect" tags in the file
    objects = root.findall('object')
    # Get annotations which contain all labels and boundboxes
    object_annotations = []
    for obj in objects:
        # Get bound box coords and labels for each face in the image
        label = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        object_annotations.append({
            'label(s)': label,
            'bbox(s)': [xmin, ymin, xmax, ymax]
        })
    return object_annotations

## 1. Creating a `torch.utils.data` Dataset and Visualizing Images, bboxes and labels

In [9]:
class FaceMaskDataset(Dataset):
    def __init__(self, image_dir, annotations_dir, transform= None):
        super().__init__()
        # Getting a sorted list of all image and annotations file names
        self.image_paths = natsorted(list(pathlib.Path(image_dir).glob("*.png")))
        self.annotation_paths = natsorted(list(pathlib.Path(annotations_dir).glob("*.xml")))

        # Getting transforms if found
        self.transform = transform
        # class_to_idx will be used when training a model
        self.class_to_idx = {"with_mask": 0, "without_mask": 1, "mask_weared_incorrect": 2}

    # Overriding the __getitem__() function to return a PIL image and its associated annotations
    def __getitem__(self, idx: int):
        image_path = self.image_paths[idx]
        image = Image.open(image_path)
        object_annotations = parse_xml(self.annotation_paths[idx])

        return image, object_annotations


    def __len__(self) -> int:
        if len(self.image_paths) == len(self.annotation_paths): 
            return len(self.image_paths)
        else:
            print("Error num of images != num of annotations \n")
            return -1

In [10]:
images_dir = Path(os.getcwd() + "/../data/images")
annotations_dir = Path(os.getcwd()+"/../data/annotations")
image_dir, annotations_dir

(PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/images'),
 PosixPath('/workspace/projects/vision/face-mask-detection/notebooks/../data/annotations'))

In [11]:
all_data = FaceMaskDataset(image_dir, annotations_dir)
all_data.__getitem__(0)[0], all_data.__getitem__(0)[1]

(<PIL.PngImagePlugin.PngImageFile image mode=RGB size=512x366>,
 [{'label(s)': 'without_mask', 'bbox(s)': [79, 105, 109, 142]},
  {'label(s)': 'with_mask', 'bbox(s)': [185, 100, 226, 144]},
  {'label(s)': 'without_mask', 'bbox(s)': [325, 90, 360, 141]}])

In [12]:
np_image = np.array(all_data.__getitem__(0)[0])
opencv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
opencv_image.shape

(366, 512, 3)

In [13]:
annotation_ex = all_data.__getitem__(0)[1]
bbox = annotation_ex[0]['bbox(s)']
xmin, ymin, xmax, ymax = bbox
cv2.rectangle(opencv_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

array([[[116, 111, 113],
        [113, 108, 110],
        [101,  96,  98],
        ...,
        [102,  59,  40],
        [157,  98,  72],
        [186, 116,  86]],

       [[106, 100, 105],
        [100,  97,  99],
        [ 89,  86,  88],
        ...,
        [ 98,  58,  39],
        [129,  73,  48],
        [173, 109,  79]],

       [[ 93,  92,  96],
        [ 88,  87,  91],
        [ 80,  79,  83],
        ...,
        [ 90,  57,  38],
        [105,  57,  31],
        [157, 100,  71]],

       ...,

       [[ 88,  83,  85],
        [ 90,  85,  87],
        [ 92,  89,  91],
        ...,
        [ 32,  27,  28],
        [ 32,  27,  28],
        [ 33,  28,  29]],

       [[ 89,  84,  86],
        [ 92,  87,  89],
        [ 95,  90,  92],
        ...,
        [ 33,  28,  29],
        [ 33,  28,  29],
        [ 34,  29,  30]],

       [[ 96,  91,  93],
        [ 99,  94,  96],
        [103,  98, 100],
        ...,
        [ 34,  29,  30],
        [ 34,  29,  30],
        [ 35,  30,  31]]

In [14]:
## Test Code
#annotation_ex = all_data.__getitem__(0)[1]
#for ann in annotation_ex:
#    label = ann["label(s)"]
#    bbox = ann["bbox(s)"]
#
#    xmin, ymin, xmax, ymax = bbox
#    cv2.rectangle(img= opencv_image,
#                  pt1= (xmin, ymin),
#                  pt2= (xmax, ymax),
#                  color= (0, 255, 0),
#                  shift= 0)
#    cv2.putText(img= opencv_image,
#               text= label,
#                org= (xmin - 40, ymin - 10),
#                fontFace= cv2.FONT_HERSHEY_SIMPLEX,
#                fontScale= 0.5,
#                color= (0, 0, 255),
#                thickness= 0)

#cv2.imshow("Image", opencv_image)
#cv2.waitKey(3000)
#cv2.destroyAllWindows()

In [15]:
def visualize_random_image_with_bbox(dataset: Dataset):
    index = random.randrange(1, dataset.__len__() - 1)
    data = dataset.__getitem__(index)
    np_image = np.array(data[0])
    opencv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
    image_annotations = data[1]

    for ann in image_annotations:
        label = ann['label(s)']
        bbox = ann['bbox(s)']
        
        xmin, ymin, xmax, ymax = bbox
        cv2.rectangle(img= opencv_image,
                      pt1= (xmin,ymin),
                      pt2= (xmax,ymax),
                      color= (0, 0, 255),
                      shift= 0)
        cv2.putText(img= opencv_image,
                    text= label,
                    org= (xmin - 40, ymin - 10),
                    fontFace= cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale= 0.4,
                    color= (0, 255, 0),
                    thickness= 1)
    
    cv2.imshow("Image", opencv_image)
    cv2.waitKey(5000)
    cv2.destroyAllWindows()

visualize_random_image_with_bbox(all_data)
        