In [1]:
import xml.etree.ElementTree as ET
import xml
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import IPython
import os
import json
import random
import numpy as np
import requests
from io import BytesIO
import base64
from math import trunc
from PIL import Image as PILImage
from PIL import ImageDraw as PILImageDraw
from pathlib import Path
from tqdm import tqdm
import shutil

In [2]:
def image_element_to_image_data(e: xml.etree.ElementTree.Element) -> dict:
    boxes = e.findall('box')
    boxes_data = []

    def to_xy(e) -> dict:
        return {'x': int(e.get('x')), 'y': int(e.get('y'))}
        
    for box in boxes:
        htop = [p for p in box.findall('part') if p.get('name') == 'htop'][0]
        lear = [p for p in box.findall('part') if p.get('name') == 'lear'][0]
        rear = [p for p in box.findall('part') if p.get('name') == 'rear'][0]
        nose = [p for p in box.findall('part') if p.get('name') == 'nose'][0]
        leye = [p for p in box.findall('part') if p.get('name') == 'leye'][0]
        reye = [p for p in box.findall('part') if p.get('name') == 'reye'][0]
        data = {
            "top": int(box.get("top")), 
            "left": int(box.get("left")), 
            "width": int(box.get("width")), 
            "height": int(box.get("height")),
            'parts': {
                'htop': to_xy(htop), 
                'lear': to_xy(lear), 
                'rear': to_xy(rear), 
                'nose': to_xy(nose), 
                'leye': to_xy(leye), 
                'reye': to_xy(reye)
            }
        }
        boxes_data.append(data)

    filepath = base_path / e.get("file")
    
    return {
        "filepath": filepath,
        "bboxes": boxes_data,
    }

def parse_xml(filepath: Path) -> dict:
    tree = ET.parse(label_path)
    root = tree.getroot()
    image_elements = root.find('images').findall('image')
    return {
        "images": [image_element_to_image_data(e) for e in image_elements],
    }

def load_xml(filepath: Path) -> dict:
    xml_data =  parse_xml(label_path)
    
    # Add image size to image_data
    for image_data in xml_data['images']:
        image = Image.open(image_data['filepath'])
        width, height = image.size
        image_data['size'] = {'width': width, 'height': height}
        
    return xml_data

In [3]:
def display_bbox(bbox, ax) -> None:
    
    # Ears
    ax.plot(bbox['parts']["lear"]["x"], bbox['parts']["lear"]["y"], marker='x', color="yellow") 
    ax.plot(bbox['parts']["rear"]["x"], bbox['parts']["rear"]["y"], marker='x', color="yellow")  

    # Head Top
    ax.plot(bbox['parts']["htop"]["x"], bbox['parts']["htop"]["y"], marker='v', color="white") 

    # Nose
    ax.plot(bbox['parts']["nose"]["x"], bbox['parts']["nose"]["y"], marker='o', color="green") 

    # Eyes
    ax.plot(bbox['parts']["leye"]["x"], bbox['parts']["leye"]["y"], marker='x', color="blue") 
    ax.plot(bbox['parts']["reye"]["x"], bbox['parts']["reye"]["y"], marker='x', color="blue")  

    rect = patches.Rectangle(
        (bbox['left'], bbox['top']),
        bbox['width'],
        bbox['height'],
        linewidth=2,
        edgecolor='r',
        facecolor='none'
    )

    ax.add_patch(rect)
    return ax
    

def display_image_data(image_data):
    image_path = image_data['filepath']
    img = Image.open(image_path)
    fig, ax = plt.subplots(1)
    ax.imshow(img)

    for bbox in image_data['bboxes']:
        display_bbox(bbox, ax)
    
    plt.show()

In [4]:
base_path = Path('../../data/01_raw/BearID/')
label_path = base_path / 'images_train_without_bc.xml'

In [5]:
xml_data = load_xml(label_path)

In [6]:
xml_data['images'][0]

{'filepath': PosixPath('../../data/01_raw/BearID/images/brooksFalls/je_201708/bf_755/755-scare-d-bear-near-the-north-side-of-brooks-falls_9237775339_o.jpg'),
 'bboxes': [{'top': 1059,
   'left': 1172,
   'width': 200,
   'height': 200,
   'parts': {'htop': {'x': 1283, 'y': 1068},
    'lear': {'x': 1335, 'y': 1080},
    'rear': {'x': 1228, 'y': 1084},
    'nose': {'x': 1279, 'y': 1197},
    'leye': {'x': 1324, 'y': 1132},
    'reye': {'x': 1246, 'y': 1133}}}],
 'size': {'width': 2000, 'height': 1500}}

In [7]:
def yolov8_bbox(bbox, size) -> dict:
    
    center_x = (bbox['left'] + bbox['width'] / 2.) / size['width']
    center_y = (bbox['top'] + bbox['height'] / 2.) / size['height']
    w = bbox['width'] / size['width']
    h = bbox['height'] / size['height']
    
    assert 0. <= center_x <= 1.
    assert 0. <= center_y <= 1.
    assert 0. <= w <= 1.
    assert 0. <= h <= 1.
    
    return {'center_x': center_x, 'center_y': center_y, 'w': w, 'h': h}


def to_yolov8_txt_format(bbox, size) -> str:
    class_num = 0 # We only detect bear faces
    yolov8 = yolov8_bbox(bbox, size)
    return f"{class_num} {yolov8['center_x']} {yolov8['center_y']} {yolov8['w']} {yolov8['h']}"

In [8]:
def build_yolov8_txt_format(xml_data, output_dir: Path):

    # Creating the directories
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(output_dir / 'images', exist_ok=True)
    os.makedirs(output_dir / 'labels', exist_ok=True)
    
    for image_data in tqdm(xml_data['images']):
        filepath = image_data['filepath']
        bboxes = image_data['bboxes']
        image_size = image_data['size']
        
        # Copying the images
        shutil.copy(filepath, output_dir / 'images' / filepath.name)

        # Making the label files
        label_content = "\n".join([to_yolov8_txt_format(bbox, image_size) for bbox in bboxes])
        with open(output_dir / 'labels' /  f"{filepath.stem}.txt", "w") as f:
            f.write(label_content)

In [9]:
output_dir = Path('../../data/04_feature/bearfacedetection/v0/')
build_yolov8_txt_format(xml_data, output_dir=output_dir)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 2673/2673 [00:01<00:00, 1622.66it/s]
