In [37]:
!pip install xmltodict pytorch-lightning albumentations

Collecting albumentations
  Downloading albumentations-1.3.1-py3-none-any.whl (125 kB)
     -------------------------------------- 125.7/125.7 KB 2.5 MB/s eta 0:00:00
Collecting qudida>=0.0.4
  Downloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Collecting scikit-image>=0.16.1
  Downloading scikit_image-0.22.0-cp39-cp39-win_amd64.whl (24.5 MB)
     --------------------------------------- 24.5/24.5 MB 10.6 MB/s eta 0:00:00
Collecting opencv-python-headless>=4.1.1
  Downloading opencv_python_headless-4.8.1.78-cp37-abi3-win_amd64.whl (38.0 MB)
     --------------------------------------- 38.0/38.0 MB 10.4 MB/s eta 0:00:00
Collecting lazy_loader>=0.3
  Downloading lazy_loader-0.3-py3-none-any.whl (9.1 kB)
Collecting tifffile>=2022.8.12
  Downloading tifffile-2023.9.26-py3-none-any.whl (222 kB)
     ------------------------------------- 222.9/222.9 KB 14.2 MB/s eta 0:00:00
Collecting imageio>=2.27
  Downloading imageio-2.31.5-py3-none-any.whl (313 kB)
     -----------------------------------

You should consider upgrading via the 'C:\Users\mrwer\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import pytorch_lightning as pl
import tqdm
import xmltodict
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
from typing import List
from PIL import Image
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

In [71]:
class2tag = {"helmet": 0}

class HelmDataset(Dataset):
    def __init__(self, data_dir, transforms=None):
        self.data_dir = data_dir
        self.transforms = transforms
        self.image_list = [f for f in os.listdir(data_dir) if f.endswith('.jpg')]
        
    def __getitem__(self,idx):
        img_name = os.path.join(self.data_dir, self.image_list[idx])
        xml_name = os.path.splitext(img_name)[0] + '.xml'

        image = Image.open(img_name).convert("RGB")
        im_w, im_h = image.size
        image = np.array(image)
        boxes, class_labels = self.__get_boxes_from_xml(xml_name)
        
        boxes = self.__convert_to_yolo_box_params(boxes, im_w, im_h)
        
        if self.transforms:
            transformed = self.transforms(image=image, bboxes=boxes, class_labels=class_labels)
            image = transformed["image"]
            boxes = transformed["bboxes"]
            class_labels = transformed["class_labels"]
            
        image = torch.tensor(image,dtype=torch.float32)
        boxes = torch.tensor(boxes, dtype=torch.float32)
        class_labels = torch.tensor(class_labels, dtype=torch.int)
        
        target_tensor = (boxes, class_labels)
        return  image, target_tensor

    def __len__(self):
            return len(self.image_list)
        
    def __get_boxes_from_xml(self, xml_filename: str):
        boxes = []
        class_labels = []
        with open(xml_filename) as fd:
            doc = xmltodict.parse(fd.read())
            objects = doc['annotation']['object']
            if not isinstance(objects, list):
                objects = [objects]
            
            for object in objects:
                box_coordinates = object['bndbox']
                coords = [box_coordinates['xmin'], box_coordinates['ymin'], box_coordinates['xmax'], box_coordinates['ymax']]
                boxes.append([int(x) for x in coords ])
                class_labels.append(class2tag[object['name']])
                  
        return boxes, class_labels

    def __convert_to_yolo_box_params(self, box_coordinates: List[int],im_w,im_h):
        new_box_coordinates = []
        for box in box_coordinates:
            x_center = (box[0] + box[2]) / 2 / im_w
            y_center = (box[1] + box[3]) / 2 / im_h
            width = (box[2] - box[0]) / im_w
            height = (box[3] - box[1]) / im_h
            new_box_coordinates.append([x_center, y_center, width, height])
    
        return new_box_coordinates
        

In [85]:
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),],
    bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]),
)

test_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),],
    bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]),
)

In [87]:
train_dataset = HelmDataset(data_dir="./data/train",transforms=train_transform)
val_dataset = HelmDataset(data_dir="./data/test", transforms=test_transform)

In [92]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=4, shuffle=False)

In [93]:
def intersection_over_union(predicted_bbox, gt_bbox) -> float:
    """
    :param: predicted_bbox - [x_min, y_min, x_max, y_max]
    :param: gt_bbox - [x_min, y_min, x_max, y_max]
    
    """
    intersection_bbox = np.array(
        [
            max(predicted_bbox[0], gt_bbox[0]),
            max(predicted_bbox[1], gt_bbox[1]),
            min(predicted_bbox[2], gt_bbox[2]),
            min(predicted_bbox[3], gt_bbox[3]),
        ]
    )

    intersection_area = max(intersection_bbox[2] - intersection_bbox[0], 0) * max(intersection_bbox[3] - intersection_bbox[1], 0)
    area_dt = (predicted_bbox[2] - predicted_bbox[0]) * (predicted_bbox[3] - predicted_bbox[1])
    area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
    
    union_area = area_dt + area_gt - intersection_area

    iou = intersection_area / union_area
    return iou