# Annotator

### important:
Requires python env with python >= 10!

The purpose of this script is to annotate images of drones for finetuning yolo.

This involeves the following steps:
- read raw images
- detect drones
- extract the most probable bounding box (if any)
- write the input image to output/images and the labels in yolo format in output/labels

In [None]:
# dependencies

!pip install autodistill autodistill-yolov5 autodistill-grounding-dino supervision opencv-python

: 

In [None]:
# imports

import os
import numpy as np
import cv2
import supervision as sv

from pathlib import Path
from autodistill_grounding_dino import GroundingDINO
from autodistill.detection import CaptionOntology

: 

In [None]:
# model hyperparameters

BOX_THRESHOLD = 0.7
TEXT_THRESHOLD = 0.5

: 

In [None]:
# init model

base_model = GroundingDINO(
    ontology=CaptionOntology({"drone": "drone"}), 
    box_threshold=BOX_THRESHOLD, 
    text_threshold=TEXT_THRESHOLD
)

: 

In [None]:
# input images

IMG_DIR = "input/images"
imgs_paths = [os.path.join(IMG_DIR, img) for img in os.listdir(IMG_DIR)]
imgs_paths

: 

In [None]:

# Directories for saving outputs
output_orig_image_dir = Path('output/images')
output_annotated_image_dir = Path('output/annotated_images')
output_label_dir = Path('output/labels')

# Create directories if they don't exist
output_orig_image_dir.mkdir(parents=True, exist_ok=True)
output_annotated_image_dir.mkdir(parents=True, exist_ok=True)
output_label_dir.mkdir(parents=True, exist_ok=True)

: 

In [None]:
# Process each image

for img in imgs_paths:
    img_path = Path(img)  # Convert img to Path object
    predictions = base_model.predict(str(img_path))

    if len(predictions.xyxy) > 0:  # Proceed only if there are predictions (drones detected)
        image = cv2.imread(str(img_path))

        # Save the original image
        output_orig_image_path = output_orig_image_dir / f"{img_path.stem}_orig.png"
        cv2.imwrite(str(output_orig_image_path), image)

        # Find the index of the prediction with the highest confidence
        highest_confidence_index = np.argmax(predictions.confidence)

        # Extract the bounding box with the highest confidence
        x1, y1, x2, y2 = predictions.xyxy[highest_confidence_index]
        confidence = predictions.confidence[highest_confidence_index]

        # Create a Detections object with only the highest confidence box
        detections = sv.Detections(
            xyxy=np.array([[x1, y1, x2, y2]], dtype=float),  # Bounding box coordinates as floats
            confidence=np.array([confidence], dtype=float),  # Confidence score as float
            class_id=np.array([0], dtype=int)  # Assuming class_id for drone is 0
        )

        # Annotate the image
        box_annotator = sv.BoxAnnotator()
        labels = [f"drone {confidence:0.2f}"]
        annotated_image = box_annotator.annotate(scene=image, detections=detections, labels=labels)

        # Save the annotated image
        output_annotated_image_path = output_annotated_image_dir / f"{img_path.stem}_annotated.png"
        cv2.imwrite(str(output_annotated_image_path), annotated_image)

        # Extract bounding box and save it to a text file in YOLO format
        output_label_path = output_label_dir / f"{img_path.stem}.txt"
        with open(output_label_path, 'w') as f:
            # Calculate center, width, and height
            x_center = (x1 + x2) / 2
            y_center = (y1 + y2) / 2
            width = x2 - x1
            height = y2 - y1

            # YOLO format: class_id x_center y_center width height
            class_id = 0  # Assuming class_id for drone is 0

            # Normalize the values by the image size (to be used by YOLO)
            img_height, img_width = image.shape[:2]
            x_center /= img_width
            y_center /= img_height
            width /= img_width
            height /= img_height

            # Write to the label file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

: 