# Using Roboflow model for data labelling

In [1]:
!pip install -q inference supervision

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/105.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.7/105.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.4/99.4 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l

In [51]:
import csv
import cv2
import requests
import numpy as np
from pathlib import Path
import supervision as sv
import concurrent.futures
from google.colab import drive
from inference import get_model
from concurrent.futures import ThreadPoolExecutor, as_completed

### Step 1: Configuration

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
ROBOFLOW_API_KEY = "Tlb27HVkFzyExjB3VkA5"
MODEL_ID = "project-hornet-detection-bbpor/4"
CONFIDENCE = 0.5

METADATA_FILE = "/content/drive/MyDrive/hornet_project/data/metadata/hornets_metadata.csv"
OUTPUT_DIR = Path("/content/drive/MyDrive/hornet_project/annotated/")

IMAGES_DIR = OUTPUT_DIR / "images"
LABELS_DIR = OUTPUT_DIR / "labels"

IMAGES_DIR.mkdir(parents=True, exist_ok=True)
LABELS_DIR.mkdir(parents=True, exist_ok=True)

### Step 2: Initialize Roboflow model

In [5]:
model = get_model(MODEL_ID, ROBOFLOW_API_KEY)
print("Model loaded!")



Model loaded!


### Step 3: Define functions

In [55]:
# -------------------------
# READ METADATA
# -------------------------

def load_metadata(csv_file):
    with open(csv_file, "r", encoding="utf-8") as f:
        return [
            row["identifier"].strip()
            for row in csv.DictReader(f, delimiter="\t")
            if row.get("identifier", "").strip()
        ]


# -------------------------
# CONVERT TO YOLO FORMAT
# -------------------------

def sv_to_yolo_fast(det, w, h):
    if len(det) == 0:
        return []

    xyxy = det.xyxy.astype(np.float32)
    cid  = det.class_id

    wh = xyxy[:, 2:4] - xyxy[:, 0:2]
    cen = xyxy[:, 0:2] + wh / 2

    cen[:, 0] /= w
    cen[:, 1] /= h
    wh[:, 0]  /= w
    wh[:, 1]  /= h

    return [
        f"{c} {cx:.6f} {cy:.6f} {ww:.6f} {hh:.6f}"
        for (cx, cy), (ww, hh), c in zip(cen, wh, cid)
    ]


# -------------------------
# IMAGE DOWNLOADER (PARALLEL)
# -------------------------

def download_image(url):
    try:
        resp = requests.get(url, timeout=5)
        resp.raise_for_status()
        arr = np.frombuffer(resp.content, np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        return img
    except Exception:
        return None



# -------------------------
#  PROCESS N IMAGES
# -------------------------

def annotate_yolo(
    n: int,
    start_index=0,
    batch_size=16,
    start_file_id=1,
    save_annotated=False
):
    all_urls = load_metadata(METADATA_FILE)
    metadata_len = len(all_urls)

    bbox_annotator = sv.BoxAnnotator()
    label_annotator = sv.LabelAnnotator()

    success = 0
    meta_idx = start_index

    print(f"Target: {n} images (starting at index {start_index})")
    print(f"Output files will begin at ID: {start_file_id}")

    while success < n and meta_idx < metadata_len:

        # ---- Prepare batch URLs ----
        batch_urls = []
        for _ in range(batch_size):
            if meta_idx >= metadata_len:
                break
            batch_urls.append(all_urls[meta_idx])
            meta_idx += 1

        # ---- Parallel download ----
        with ThreadPoolExecutor(max_workers=32) as exe:
            future_to_url = {exe.submit(download_image, u): u for u in batch_urls}

            downloaded_images = []
            valid_urls = []

            for fut in as_completed(future_to_url):
                img = fut.result()
                if img is not None:
                    downloaded_images.append(img)
                    valid_urls.append(future_to_url[fut])

        if not downloaded_images:
            continue

        # ---- Batch inference ----
        try:
            batch_results = model.infer(downloaded_images, confidence=CONFIDENCE)
        except Exception as e:
            print(f"⚠️ Batch inference error: {e}")
            continue

        # ---- Process batch ----
        for img, res in zip(downloaded_images, batch_results):

            if success >= n:
                break

            det = sv.Detections.from_inference(res)
            h, w = img.shape[:2]

            yolo = sv_to_yolo_fast(det, w, h)

            # NEW: compute file ID offset
            file_num = start_file_id + success
            file_id = f"{file_num:05d}"

            # Save original
            cv2.imwrite(str(IMAGES_DIR / f"{file_id}.jpg"), img)

            # Save yolo labels
            with open(LABELS_DIR / f"{file_id}.txt", "w") as f:
                f.write("\n".join(yolo))

            # Optional annotated image
            if save_annotated:
                ann = bbox_annotator.annotate(scene=img, detections=det)
                ann = label_annotator.annotate(scene=ann, detections=det)
                cv2.imwrite(str(OUTPUT_DIR / f"annotated_{file_id}.jpg"), ann)

            success += 1

        print(f"✓ {success}/{n} done")

    # ---- Finished ----
    last_used = meta_idx - 1
    print("\nDone!")
    print(f"Successfully annotated {success}/{n}")
    print(f"Last metadata index used: {last_used}")
    print(f"Final output file ID: {start_file_id + success - 1}")

    if success < n:
        print("⚠️ Metadata exhausted before completing target.")


### Step 4: Annotate

First 72 images contain frequent duplicates - skipping those altogether.

If there were problems with downloading the picture, then the image is skipped.




#### Used indexes:

First batch image indexes: 72 - 1159

Second batch image indexes: 1160 - 2215

Third batch image indexes: 2216 - 3223

In [53]:
# First upload --
# annotate_yolo(n = 1000, start_index = 72, start_file_id = 1, save_annotated=True)

Target: 1000 images (starting at index 72)
✓ 16/1000 done
✓ 32/1000 done
✓ 35/1000 done
✓ 38/1000 done
✓ 53/1000 done
✓ 68/1000 done
✓ 84/1000 done
✓ 100/1000 done
✓ 116/1000 done
✓ 132/1000 done
✓ 148/1000 done
✓ 164/1000 done
✓ 180/1000 done
✓ 196/1000 done
✓ 212/1000 done
✓ 228/1000 done
✓ 244/1000 done
✓ 260/1000 done
✓ 276/1000 done
✓ 292/1000 done
✓ 308/1000 done
✓ 324/1000 done
✓ 340/1000 done
✓ 356/1000 done
✓ 372/1000 done
✓ 388/1000 done
✓ 404/1000 done
✓ 420/1000 done
✓ 436/1000 done
✓ 452/1000 done
✓ 468/1000 done
✓ 484/1000 done
✓ 500/1000 done
✓ 516/1000 done
✓ 532/1000 done
✓ 548/1000 done
✓ 564/1000 done
✓ 580/1000 done
✓ 596/1000 done
✓ 612/1000 done
✓ 628/1000 done
✓ 644/1000 done
✓ 660/1000 done
✓ 676/1000 done
✓ 692/1000 done
✓ 708/1000 done
✓ 724/1000 done
✓ 740/1000 done
✓ 756/1000 done
✓ 772/1000 done
✓ 787/1000 done
✓ 803/1000 done
✓ 819/1000 done
✓ 835/1000 done
✓ 851/1000 done
✓ 867/1000 done
✓ 883/1000 done
✓ 899/1000 done
✓ 915/1000 done
✓ 931/1000 done
✓ 94

In [57]:
# Second upload --
# annotate_yolo(n = 1000, start_index = 1160, start_file_id = 1001, save_annotated=True)

Target: 1000 images (starting at index 1160)
Output files will begin at ID: 1001
✓ 16/1000 done
✓ 32/1000 done
✓ 48/1000 done
✓ 64/1000 done
✓ 80/1000 done
✓ 96/1000 done
✓ 112/1000 done
✓ 128/1000 done
✓ 144/1000 done
✓ 160/1000 done
✓ 176/1000 done
✓ 192/1000 done
✓ 208/1000 done
✓ 224/1000 done
✓ 240/1000 done
✓ 256/1000 done
✓ 272/1000 done
✓ 288/1000 done
✓ 304/1000 done
✓ 320/1000 done
✓ 336/1000 done
✓ 352/1000 done
✓ 368/1000 done
✓ 384/1000 done
✓ 400/1000 done
✓ 416/1000 done
✓ 432/1000 done
✓ 448/1000 done
✓ 464/1000 done
✓ 480/1000 done
✓ 496/1000 done
✓ 512/1000 done
✓ 528/1000 done
✓ 544/1000 done
✓ 560/1000 done
✓ 576/1000 done
✓ 592/1000 done
✓ 608/1000 done
✓ 624/1000 done
✓ 640/1000 done
✓ 656/1000 done
✓ 672/1000 done
✓ 688/1000 done
✓ 704/1000 done
✓ 720/1000 done
✓ 736/1000 done
✓ 752/1000 done
✓ 768/1000 done
✓ 784/1000 done
✓ 800/1000 done
✓ 816/1000 done
✓ 832/1000 done
✓ 848/1000 done
✓ 864/1000 done
✓ 880/1000 done
✓ 894/1000 done
✓ 901/1000 done
✓ 917/1000 do

In [58]:
# Third upload --
# annotate_yolo(n = 1000, start_index = 2216, start_file_id = 2001, save_annotated=True)

Target: 1000 images (starting at index 2216)
Output files will begin at ID: 2001
✓ 16/1000 done
✓ 32/1000 done
✓ 48/1000 done
✓ 64/1000 done
✓ 80/1000 done
✓ 96/1000 done
✓ 112/1000 done
✓ 128/1000 done
✓ 144/1000 done
✓ 160/1000 done
✓ 176/1000 done
✓ 192/1000 done
✓ 208/1000 done
✓ 224/1000 done
✓ 240/1000 done
✓ 256/1000 done
✓ 272/1000 done
✓ 288/1000 done
✓ 304/1000 done
✓ 320/1000 done
✓ 336/1000 done
✓ 352/1000 done
✓ 368/1000 done
✓ 384/1000 done
✓ 400/1000 done
✓ 416/1000 done
✓ 432/1000 done
✓ 448/1000 done
✓ 464/1000 done
✓ 480/1000 done
✓ 496/1000 done
✓ 512/1000 done
✓ 528/1000 done
✓ 544/1000 done
✓ 560/1000 done
✓ 576/1000 done
✓ 592/1000 done
✓ 608/1000 done
✓ 620/1000 done
✓ 634/1000 done
✓ 650/1000 done
✓ 666/1000 done
✓ 682/1000 done
✓ 698/1000 done
✓ 714/1000 done
✓ 730/1000 done
✓ 746/1000 done
✓ 762/1000 done
✓ 778/1000 done
✓ 794/1000 done
✓ 810/1000 done
✓ 826/1000 done
✓ 842/1000 done
✓ 858/1000 done
✓ 874/1000 done
✓ 890/1000 done
✓ 906/1000 done
✓ 922/1000 do