In [None]:
import os
from google.colab import drive 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#for xml
import xml.etree.ElementTree as ET

#PyTorch
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Image processing
from PIL import Image, ImageDraw, ExifTags, ImageColor, ImageFont

plt.ion()   # interactive mode

In [None]:
#moungting to Google Drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
#introducing annotations and images
annotation_path = 'gdrive/My Drive/Colab Notebooks/Image/Dataset/annotations'
images_path = 'gdrive/My Drive/Colab Notebooks/Image/Dataset/images'

In [None]:
def encoded_labels(labels_list):
    encoded=[]
    for label in labels_list:
        if label == "with_mask":
            code = 1
        elif label == "mask_weared_incorrect":
            code = 2
        elif label == "without_mask":
            code = 3
        else:
            code = 0 #background
        encoded.append(code)
    return encoded  

In [None]:
def decode_labels(labels_list):
    labels=[]
    for code in labels_list:
        if code == 1:
            label = "with_mask"
        elif code == 2:
            label = "mask_weared_incorrect"
        elif code == 3:
            label = "without_mask"
        else:
            label = 'background'
        labels.append(label)
    return labels

In [None]:
class Data(Dataset):

    # Constructor
    def __init__(self, annotation_path, images_path, transform=None, mode=None):

        # Image directories
        self.annotation_path = annotation_path
        self.images_path = images_path
        # The transform is goint to be used on image
        self.transform = transform
        # Create dataframe to hold info
        self.data = pd.DataFrame(columns=['Filename', 'BoundingBoxes', 'Labels', 'Area', 'Number_of_Objects'])
        
        # Append rows with image filename and respective bounding boxes to the df
        for file in enumerate(os.listdir(images_path)):
            # Find image annotation file
            ann_file_path = os.path.join(annotation_path, file[1][:-4]) + '.xml'
            # Read XML file and return bounding boxes and class attributes
            objects = self.read_XML(ann_file_path)
            # getting the list of labels in an image
            list_labels = encoded_labels(objects[0]['labels'])
            # Create list of bounding boxes in an image
            list_bb = []
            list_area = []
            n_obj = len(objects[0]['objects'])
            for i in objects[0]['objects']:
                new_list = [i['xmin'], i['ymin'], i['xmax'], i['ymax']]
                list_bb.append(new_list)
                list_area.append((i['xmax'] - i['xmin']) * (i['ymax'] - i['ymin']))

            # Create dataframe object with row containing [(Image file name),(Bounding Box List)]
            df = pd.DataFrame([[file, list_bb, list_labels, list_area, n_obj]],
                              columns=['Filename', 'BoundingBoxes', 'Labels', 'Area', 'Number_of_Objects'])
            self.data = self.data.append(df)

        # Number of images in dataset
        self.len = self.data.shape[0]

        if mode == 'train':
            self.data = self.data[:680]
        elif mode == 'validation':
            self.data = self.data[680:700]
        elif mode == 'test':
            self.data = self.data[700:853]



    # Get the length
    def __len__(self):
        return self.len


   # Getter
    def __getitem__(self, idx):
        # Image file path
        img_name = os.path.join(self.images_path, self.data.iloc[idx, 0])
        # Open image file and tranform to tensor
        image = Image.open(img_name).convert('RGB')
        # Get bounding box coordinates
        bbox = torch.tensor(self.data.iloc[idx, 1])
        # Get labels
        labels = torch.tensor(self.data.iloc[idx, 2])
        # Get bounding box areas
        area = torch.tensor(self.data.iloc[idx, 3])

        # If any, aplly tranformations to image and bounding box mask
        if self.transform:
            # Convert PIL image to numpy array
            image  = np.array(image )
            # Apply transformations
            transformed = self.transform(image=img, bboxes=bbox)
            # Convert numpy array to PIL Image
            image  = Image.fromarray(transformed['image'])
            # Get transformed bb
            bbox = torch.tensor(transformed['bboxes'])

        # suppose all instances are not crowd
        num_objs = self.data.iloc[idx, 4]
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        # Transform img to tensor
        image = torchvision.transforms.ToTensor()(image)
        # Build Targer dict
        target= {"boxes": bbox, "labels": labels, "image_id": torch.tensor([idx]), "area": area, "iscrowd": iscrowd}

        return image, target  
    # XML reader -> returns dictionary with image bounding boxes sizes
    def read_XML(self, annotation_path):
        bboxes = [{
            'file': annotation_path,
            'labels': [],
            'objects': []
        }]

        # Reading XML file objects and print Bounding Boxes
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        objects = root.findall('object')
       
       #becuase we have multiobjects in the image
        for obj in objects:
            # label
            label = obj.find('name').text  #without_mask/wtih_mask/mask_weared_incorrect	
            bboxes[0]['labels'].append(label)

            # bbox dimensions
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            bboxes[0]['objects'].append({'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax})

        return bboxes

In [None]:
dataset_train = Data(annotation_path,images_path, mode = 'train')