# Creating YOLO data annotations for ultralytics


## Introduction
The data annotations for ultralytics require the following structure:
```
data/
    - images/
        - train/
            - image1.jpg
            - image2.jpg
            - ...
        - val/
            - image1.jpg
            - image2.jpg
            - ...
        - test/
            - image1.jpg
            - image2.jpg
            - ...
    - labels/
        - train/
            - image1.txt
            - image2.txt
            - ...
        - val/
            - image1.txt
            - image2.txt
            - ...
        - test/
            - image1.txt
            - image2.txt
            - ...
dataset.yaml
```
Additionally, the dataset.yaml file should contain the following structure:
```yaml
path: ../images
train: ../images/train
val: ../images/val
test: ../images/test

nc: 1
names: ['haz_sign']
```


## Algorithm
1. Load the lable dataset
2. Filter sources by locally available videos
3. Create the dataset structure
4. Loop through the videos and create the images and labels per frame that contains the bounding boxes
5. Save the images and labels in the dataset structure


In [None]:
import kagglehub
import pandas as pd
import os
import cv2
import random
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
distribution = [('train', 0.8), ('test', 0.1), ('val', 0.1)]

In [None]:
path = kagglehub.dataset_download("stanislavlevendeev/hazmat-detection")
video_directory = os.environ["PATH_TO_DATA"]
print("Path to dataset files:", path)
print("Path to video files:", video_directory)

At first we are looking at the videos that are labled by grouping the labels into sources. We will then create the annotations for the videos in the format required by ultralytics.

In [None]:
df = pd.read_csv(path + "/labels_dataframe.csv")
videos = df["Source"].unique()
videos

In [None]:
# get unique task source pairs from the dataframe
unique_tasks = df.drop_duplicates(subset=["Job Id", "Source", "Relative Frame"])
unique_tasks.count()

Consequently, we will filter the unique videos for which labels are created by these that are available locally.

In [None]:
available_videos = os.listdir(video_directory)
available_videos = [video for video in available_videos if video.endswith(".mp4")]
available_videos = [video for video in available_videos if video in videos]
available_videos

In [None]:
total_frames = df[df["Source"].isin(available_videos)].count()["Absolute Frame"]
total_frames

In [None]:
def createYOLODataAnnotation(
    path=None,
    label_name=None,
    classId=None,
    img_width=0,
    img_height=0,
    rows=None
):
    #if already exists, skip
    if os.path.exists(os.path.join(path, label_name)):
        return
    os.makedirs(path, exist_ok=True)
    if not classId or classId == 0:
        with open(os.path.join(path, label_name), "w") as f:
            f.write(f"")
        return
    lable = ''
    for index, row in rows.iterrows():
        x_center = (row['XTL'] + row["XBR"]) / 2 / img_width
        y_center = (row['YTL'] + row['YBR']) / 2 / img_height
        width = (row["XBR"] - row['XTL']) / img_width
        height = (row['YBR'] - row['YTL']) / img_height
        lable += f"0 {x_center} {y_center} {width} {height}\n"
    with open(os.path.join(path, label_name), "w") as f:
        f.write(lable)

frames_dir = path + "/images/"
os.makedirs(frames_dir, exist_ok=True)

def saveFrame(video_name="", frame_num=0, frame=None, frames_dir=frames_dir):
    # if already exists, skip
    if os.path.exists(f"{frames_dir}/{video_name}_{frame_num}.jpg"):
        return
    if frame is None:
        return
    frame_index = str(frame_num).zfill(5)
    frame_name = f"{video_name}_{frame_index}.jpg"
    cv2.imwrite(f"{frames_dir}/{frame_name}", frame)
annotations_created = {
    'train': 0,
    'test': 0,
    'val': 0
}
def get_rnd_distribution():
    new_dist = distribution.copy()
    while len(new_dist) > 0:
        rnd_dist = random.choice(new_dist)
        required_amount = int(total_frames * rnd_dist[1])
        if required_amount >= annotations_created[rnd_dist[0]]:
            return rnd_dist
        else:
            new_dist.remove(rnd_dist)
    print("WHAT THE HELL IS GOING ON?")
    return distribution[0]

In [None]:
for name, value in distribution:
    labels_dir = os.path.join(path, "yolo", "labels", name)
    frames_dir = os.path.join(path, "yolo", "images", name)
    os.makedirs(labels_dir, exist_ok=True)
    os.makedirs(frames_dir, exist_ok=True)
labels_dir = os.path.join(path, "yolo", "labels")
frames_dir = os.path.join(path, "yolo", "images")
with tqdm(total=total_frames, desc="Processing") as pbar:
    for video in available_videos:
        video_path = video_directory + "/" + video
        if os.path.exists(video_path) == False:
            print(f"File {video_path} not found")
            continue
        processed_source = video.split(".")[0]
        # Open video file
        cap = cv2.VideoCapture(video_path)
        number_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        video_labels = df[df["Source"] == video]
        frame_idx = 0
        # save each frame and .txt file with labels
        while frame_idx < number_of_frames:
            ret, frame = cap.read()  # Read each frame
            if not ret:
                break  # End of video
            img_height, img_width, _ = frame.shape
            img_height = int(img_height)
            img_width = int(img_width)
            label_frames = video_labels[video_labels["Relative Frame"] == frame_idx]
            isObject = frame_idx in video_labels["Relative Frame"].values
            if(isObject):
                rnd_dist = get_rnd_distribution()
                saveFrame(processed_source, frame_idx, frame, os.path.join(frames_dir, rnd_dist[0]))
                frame_index = str(frame_idx).zfill(5)
                label_name = f"{processed_source}_{frame_index}.txt"
                createYOLODataAnnotation(
                    path=os.path.join(labels_dir, rnd_dist[0]),
                    label_name=label_name,
                    classId=1 if isObject else None,
                    img_height=img_height,
                    img_width=img_width,
                    rows=label_frames
                )
                annotations_created[rnd_dist[0]] += label_frames.shape[0]
                pbar.update( label_frames.shape[0])
            frame_idx += 1
            pbar.set_description(f"Processing {video}, Frame {frame_idx}/{number_of_frames}")
        cap.release()

In [None]:
annotations_created

## Validation of the data annotations
Once data annotations is created these need to be checked for correctness.

In [None]:

# Function to draw rectangles on an image
def draw_rectangles(image_path, annotation_path, issue = None):
    # Read the image
    image = cv2.imread(image_path)
    img_height, img_width, _ = image.shape

    # Read the annotation file
    with open(annotation_path, "r") as f:
        lines = f.readlines()
        for line in lines:
            class_id, x_center, y_center, width, height = map(float, line.strip().split())
            # Convert from YOLO format to bounding box coordinates
            x_min = int((x_center - width / 2) * img_width)
            y_min = int((y_center - height / 2) * img_height)
            x_max = int((x_center + width / 2) * img_width)
            y_max = int((y_center + height / 2) * img_height)
            # Draw the rectangle
            #change color for each new rectangle so that they are distinguishable the first element is the color in BGR format
            #Display issue
            if issue != None and issue != "":
                cv2.putText(image, str(issue), (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 1,  (0, 255, 50), 2)
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            
    
    # Convert BGR image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image using matplotlib
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis('off')
    plt.show()


In [None]:

# Directory containing images and annotations
image_dir = os.path.join(path, "yolo", "images")
annotation_dir = os.path.join(path, "yolo", "labels")


# Get a list of all images
images = df[df["Source"].isin(available_videos)]
print("Number of images:", len(images))
dist = 'train'
# Pick a random image
random_image = random.choice(images["Absolute Frame"].values)
print("Random image:", random_image)
frame_info = images[images["Absolute Frame"] == random_image]
image_name = frame_info["Source"].values[0].replace('.mp4','') + "_" + str(frame_info["Relative Frame"].values[0]).zfill(5) + ".jpg"
image_path = os.path.join(image_dir, dist, image_name)
if(not os.path.isfile(image_path)):
    dist = 'test'
    image_path = os.path.join(image_dir, dist, image_name)
if(not os.path.isfile(image_path)):
    dist = 'val'
    image_path = os.path.join(image_dir, dist, image_name)
if( os.path.isfile(image_path)):
    print("Random image:", image_path)
    # Corresponding annotation file
    annotation_filename = frame_info["Source"].values[0].replace('.mp4','') + "_" + str(frame_info["Relative Frame"].values[0]).zfill(5) + ".txt"
    annotation_path = os.path.join(annotation_dir,dist, annotation_filename)

    # Draw rectangles on the random image
    draw_rectangles(image_path, annotation_path, frame_info["Issue"].values[0])
else:
    print("No image found:" + image_path)
