In [None]:
import kagglehub
from lxml import etree
import pandas as pd
import json
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt

In [None]:
path = kagglehub.dataset_download("stanislavlevendeev/haz-mat-signs")

In [None]:
annotations_path = path + "/annotations.xml"

In [None]:
tree = etree.parse(annotations_path)
# Get the root element
root = tree.getroot()
print("Root element:", root.tag)


In [None]:
images_with_boxes = []

for image in root.findall('image'):
    for box in image.findall('box'):
        image_id = image.get('name').split('.')[0]
        if(len(image_id.split('/')) > 1):
            image_id = image_id.split('/')[1]
        attributes = {}
        for attribute in box.findall('attribute'):
            attributes[attribute.get('name')] = attribute.text
        image_info = {
            'image_id': image_id,
            'image_name': image.get('name'),
            'task_id': image.get('task_id'),
            'width': image.get('width'),
            'height': image.get('height'),
            'box_label': box.get('label'),
            'box_source': box.get('source'),
            'box_xtl': box.get('xtl'),
            'box_ytl': box.get('ytl'),
            'box_xbr': box.get('xbr'),
            'box_ybr': box.get('ybr'),
            'issue': attributes.get('issue', None),  
            'code': attributes.get('code', None)    

        }
        images_with_boxes.append(image_info)

In [None]:
df_images_with_boxes = pd.DataFrame(images_with_boxes)

In [None]:
df_images_with_boxes.head()

In [None]:
df_images_with_boxes['image_id'].unique().shape

In [None]:
def create_coco_annotations(df_images_with_boxes, output_path):
    # Create categories
    categories = []
    for i, label in enumerate(df_images_with_boxes['box_label'].unique()):
        categories.append({'id': i + 1, 'name': label, 'supercategory': 'none'})
    images = []
    annotations = []
    image_id = 0
    annotation_id = 0
    for image_name in tqdm(df_images_with_boxes['image_name'].unique()):
        image_id += 1
        image_info = df_images_with_boxes[df_images_with_boxes['image_name'] == image_name].iloc[0]
        images.append({
            'id': image_id,
            'file_name': image_name,
            'width': image_info['width'],
            'height': image_info['height'],
        })
        for _, row in df_images_with_boxes[df_images_with_boxes['image_name'] == image_name].iterrows():
            annotation_id += 1
            annotations.append({
                'id': annotation_id,
                'image_id': image_id,
                'category_id': 1,
                'bbox': [row['box_xtl'], row['box_ytl'], float(row['box_xbr']) - float(row['box_xtl']), float(row['box_ybr']) - float(row['box_ytl'])],
                'area': (float(row['box_xbr']) - float(row['box_xtl'])) * (float(row['box_ybr']) - float(row['box_ytl'])),
                'iscrowd': 0,
            })
    coco_data = {
        'images': images,
        'annotations': annotations,
        'categories': categories,
    }
    with open(output_path, 'w') as f:
        json.dump(coco_data, f)

In [None]:
create_coco_annotations(df_images_with_boxes, 'data/hazmat_coco.json')

In [None]:
#create yolo dataset annotations only labels files 
def create_yolo_annotations(df_images_with_boxes, output_path):
    for image_name in tqdm(df_images_with_boxes['image_name'].unique()):
        image_info = df_images_with_boxes[df_images_with_boxes['image_name'] == image_name].iloc[0]
        image_id = image_info['image_id']
        image_width = int(image_info['width'])
        image_height = int(image_info['height'])
        with open(output_path + '/' + image_id + '.txt', 'w') as f:
            for _, row in df_images_with_boxes[df_images_with_boxes['image_name'] == image_name].iterrows():
                x_center = (float(row['box_xtl']) + float(row['box_xbr'])) / 2
                y_center = (float(row['box_ytl']) + float(row['box_ybr'])) / 2
                width = float(row['box_xbr']) - float(row['box_xtl'])
                height = float(row['box_ybr']) - float(row['box_ytl'])
                x_center /= image_width
                y_center /= image_height
                width /= image_width
                height /= image_height
                f.write(f"0 {x_center} {y_center} {width} {height}\n")

In [None]:
create_yolo_annotations(df_images_with_boxes, 'data/yolo_labels')

In [None]:
def draw_rectangles(image_path, annotation_path, issue = None):
    # Read the image
    image = cv2.imread(image_path)
    img_height, img_width, _ = image.shape

    # Read the annotation file
    with open(annotation_path, "r") as f:
        lines = f.readlines()
        for line in lines:
            class_id, x_center, y_center, width, height = map(float, line.strip().split())
            # Convert from YOLO format to bounding box coordinates
            x_min = int((x_center - width / 2) * img_width)
            y_min = int((y_center - height / 2) * img_height)
            x_max = int((x_center + width / 2) * img_width)
            y_max = int((y_center + height / 2) * img_height)
            # Draw the rectangle
            #change color for each new rectangle so that they are distinguishable the first element is the color in BGR format
            #Display issue
            if issue != None and issue != "":
                cv2.putText(image, str(issue), (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 1,  (0, 255, 50), 2)
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            
    
    # Convert BGR image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image using matplotlib
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()
draw_rectangles(path + '/images/152.png', './data/yolo_labels/152.txt')