In [None]:
!pip install -q roboflow supervision

In [None]:
!pip install -q ultralytics>=8.3.0 roboflow wandb huggingface_hub pyyaml pillow

In [None]:
from google.colab import userdata
WANDB_API_KEY = userdata.get('wandb')
HF_TOKEN = userdata.get('HF_TOKEN')
ROBOFLOW_API_KEY = userdata.get('roboflow')

In [None]:
#@title üîß Download, Sample & Merge Datasets

from roboflow import Roboflow
from pathlib import Path
from google.colab import userdata
import shutil
import random
import yaml

# ============================================
# CONFIG - Your datasets
# ============================================
DATASETS = {
    "bird": ("edthwarsaw", "bird-hkwll-nvqfd", 1),
    "quest-office": ("edthwarsaw", "quest-office-hntgz-90xb7", 2),
    "people-detection": ("edthwarsaw", "people-detection-o4rdr-otqtt", 1),
    "drone-detect": ("edthwarsaw", "drone-detect-suvzw-x4zqp", 1),
    "person": ("edthwarsaw", "person-hgivm-wrlti", 1),
    "shahed136": ("edthwarsaw", "shahed136-detect-emoo1", 3),
}

# How many images to sample from each (set to None for all)
SAMPLES = {
    "bird": 3000,
    "quest-office": None,  # All (933)
    "people-detection": 6000,
    "drone-detect": 5000,
    "person": None,  # All (568)
    "shahed136": 4000,
}

# ============================================
# DOWNLOAD
# ============================================
ROBOFLOW_API_KEY = userdata.get('roboflow')
rf = Roboflow(api_key=ROBOFLOW_API_KEY)

downloaded = {}
for name, (workspace, project, version) in DATASETS.items():
    print(f"Downloading {name}...")
    proj = rf.workspace(workspace).project(project)
    ds = proj.version(version).download("yolov8")
    downloaded[name] = ds.location
    print(f"  ‚úÖ {ds.location}")

print("\n‚úÖ All datasets downloaded!")

# ============================================
# SAMPLE & MERGE
# ============================================
def sample_dataset(src_path, n_samples=None):
    """Sample n images from train split."""
    images_dir = Path(src_path) / "train" / "images"
    labels_dir = Path(src_path) / "train" / "labels"

    if not images_dir.exists():
        print(f"  ‚ö†Ô∏è train split not found")
        return []

    all_images = list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png"))

    if n_samples is None or len(all_images) <= n_samples:
        sampled = all_images
    else:
        sampled = random.sample(all_images, n_samples)

    pairs = []
    for img in sampled:
        label = labels_dir / f"{img.stem}.txt"
        if label.exists():
            pairs.append((img, label))

    return pairs

# Create merged directory
merged_path = Path("merged_dataset")
for split in ["train", "valid", "test"]:
    (merged_path / split / "images").mkdir(parents=True, exist_ok=True)
    (merged_path / split / "labels").mkdir(parents=True, exist_ok=True)

all_classes = set()
img_counter = 0

for name, src_path in downloaded.items():
    n_samples = SAMPLES.get(name)
    print(f"\nüì¶ {name}: sampling {'all' if n_samples is None else n_samples}...")

    # Get class names
    data_yaml = Path(src_path) / "data.yaml"
    with open(data_yaml) as f:
        info = yaml.safe_load(f)

    classes = info.get("names", [])
    if isinstance(classes, dict):
        classes = [classes[i] for i in sorted(classes.keys())]

    print(f"   Classes: {classes}")
    all_classes.update(classes)

    # Sample
    pairs = sample_dataset(src_path, n_samples)
    print(f"   Sampled: {len(pairs)} images")

    # Copy
    for img_path, label_path in pairs:
        new_name = f"img_{img_counter:06d}"
        shutil.copy(img_path, merged_path / "train" / "images" / f"{new_name}{img_path.suffix}")
        shutil.copy(label_path, merged_path / "train" / "labels" / f"{new_name}.txt")
        img_counter += 1

print(f"\n{'='*50}")
print(f"‚úÖ Merged {img_counter} images")
print(f"üìã Classes: {sorted(all_classes)}")

# ============================================
# CREATE data.yaml
# ============================================
final_classes = sorted(all_classes)

data_yaml_content = f"path: {merged_path.absolute()}\n"
data_yaml_content += "train: train/images\n"
data_yaml_content += "val: valid/images\n"
data_yaml_content += "test: test/images\n\n"
data_yaml_content += "names:\n"
for i, c in enumerate(final_classes):
    data_yaml_content += f"  {i}: {c}\n"

with open(merged_path / "data.yaml", "w") as f:
    f.write(data_yaml_content)

print(f"\nüìÑ data.yaml created with {len(final_classes)} classes:")
for i, c in enumerate(final_classes):
    print(f"   {i}: {c}")

print(f"\nüìÅ Dataset ready: {merged_path.absolute()}")


Downloading bird...
loading Roboflow workspace...
loading Roboflow project...
  ‚úÖ /content/bird-1
Downloading quest-office...
loading Roboflow workspace...
loading Roboflow project...
  ‚úÖ /content/Quest-Office-2
Downloading people-detection...
loading Roboflow workspace...
loading Roboflow project...
  ‚úÖ /content/People-Detection-1
Downloading drone-detect...
loading Roboflow workspace...
loading Roboflow project...
  ‚úÖ /content/Drone-Detect-1
Downloading person...
loading Roboflow workspace...
loading Roboflow project...
  ‚úÖ /content/person-1
Downloading shahed136...
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in shahed136-detect-3 to yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 207071/207071 [00:11<00:00, 17316.11it/s]





Extracting Dataset Version Zip to shahed136-detect-3 in yolov8:: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 16246/16246 [00:01<00:00, 10023.03it/s]


  ‚úÖ /content/shahed136-detect-3

‚úÖ All datasets downloaded!

üì¶ bird: sampling 3000...
   Classes: ['bird']
   Sampled: 2766 images

üì¶ quest-office: sampling all...
   Classes: ['Chair', 'Laptop', 'Monitor-TV', 'Person']
   Sampled: 746 images

üì¶ people-detection: sampling 6000...
   Classes: ['Bicycle', 'Car', 'Monitor-TV', 'bird', 'bus', 'motorbike', 'person']
   Sampled: 5070 images

üì¶ drone-detect: sampling 5000...
   Classes: ['drone']
   Sampled: 4469 images

üì¶ person: sampling all...
   Classes: ['Person']
   Sampled: 458 images

üì¶ shahed136: sampling 4000...
   Classes: ['shahed']
   Sampled: 4000 images

‚úÖ Merged 17509 images
üìã Classes: ['Bicycle', 'Car', 'Chair', 'Laptop', 'Monitor-TV', 'Person', 'bird', 'bus', 'drone', 'motorbike', 'person', 'shahed']

üìÑ data.yaml created with 12 classes:
   0: Bicycle
   1: Car
   2: Chair
   3: Laptop
   4: Monitor-TV
   5: Person
   6: bird
   7: bus
   8: drone
   9: motorbike
   10: person
   11: shahed

üì

In [None]:
#@title üîß Fix Class Names (Run First!)
from pathlib import Path
import yaml

merged_path = Path("merged_dataset")

# Current classes (from your output)
OLD_CLASSES = ['Bicycle', 'Car', 'Chair', 'Laptop', 'Monitor-TV', 'Person', 'bird', 'bus', 'drone', 'motorbike', 'person', 'shahed']

# Map old class ID -> new class name
REMAP = {
    0: "bicycle",    # Bicycle
    1: "car",        # Car
    2: "chair",      # Chair
    3: "laptop",     # Laptop
    4: "monitor",    # Monitor-TV
    5: "person",     # Person (merge with person)
    6: "bird",       # bird
    7: "bus",        # bus
    8: "drone",      # drone
    9: "motorbike",  # motorbike
    10: "person",    # person (keep)
    11: "shahed",    # shahed
}

# Final unified classes
FINAL_CLASSES = ["shahed", "drone", "person", "bird", "car", "bicycle", "motorbike", "bus", "chair", "laptop", "monitor"]
NEW_CLASS_TO_ID = {c: i for i, c in enumerate(FINAL_CLASSES)}

# Remap all label files
labels_dir = merged_path / "train" / "labels"
remapped = 0

for label_file in labels_dir.glob("*.txt"):
    new_lines = []
    with open(label_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                old_id = int(parts[0])
                new_class_name = REMAP.get(old_id)
                if new_class_name and new_class_name in NEW_CLASS_TO_ID:
                    parts[0] = str(NEW_CLASS_TO_ID[new_class_name])
                    new_lines.append(" ".join(parts))
                    remapped += 1

    with open(label_file, 'w') as f:
        f.write("\n".join(new_lines))

# Update data.yaml
data_yaml = f"""path: {merged_path.absolute()}
train: train/images
val: train/images
test: train/images

names:
"""
for i, c in enumerate(FINAL_CLASSES):
    data_yaml += f"  {i}: {c}\n"

with open(merged_path / "data.yaml", 'w') as f:
    f.write(data_yaml)

print(f"‚úÖ Remapped {remapped} annotations")
print(f"\nüìã Final classes ({len(FINAL_CLASSES)}):")
for i, c in enumerate(FINAL_CLASSES):
    print(f"   {i}: {c}")

‚úÖ Remapped 51805 annotations

üìã Final classes (11):
   0: shahed
   1: drone
   2: person
   3: bird
   4: car
   5: bicycle
   6: motorbike
   7: bus
   8: chair
   9: laptop
   10: monitor


In [None]:
#@title üöÄ Train YOLO12 Multi-Class
from ultralytics import YOLO
from ultralytics.utils import SETTINGS
from pathlib import Path
from datetime import datetime
import yaml
import wandb
from huggingface_hub import HfApi, upload_file
from google.colab import userdata

# ============================================
# CONFIG
# ============================================
MODEL = "yolo12m.pt"
EPOCHS = 100
BATCH_SIZE = 16
IMG_SIZE = 640

WANDB_ENTITY = "Imperial-College-London-SPQR"
WANDB_PROJECT = "European-Defense-Hackathon-Warsaw"
HF_REPO = "shng2025/EDTH-Warsaw-shahed136-detector"

DATASET_PATH = Path("merged_dataset")
DATA_YAML = str(DATASET_PATH / "data.yaml")

# Load class names
with open(DATA_YAML) as f:
    data_info = yaml.safe_load(f)
CLASS_NAMES = data_info["names"]
if isinstance(CLASS_NAMES, dict):
    CLASS_NAMES = [CLASS_NAMES[i] for i in sorted(CLASS_NAMES.keys())]

print(f"üìã Classes: {CLASS_NAMES}")
print(f"üìÅ Dataset: {DATASET_PATH}")

# ============================================
# SETUP LOGGING
# ============================================
SETTINGS["wandb"] = False

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_name = f"multiclass_{timestamp}"
print(f"\nüèÉ Run: {run_name}")

# W&B
wandb.login(key=userdata.get('wandb'))
wandb_run = wandb.init(
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    name=run_name,
    tags=["yolo12", "multi-class", "drone-detection", "defense"],
    config={
        "model": MODEL,
        "epochs": EPOCHS,
        "batch": BATCH_SIZE,
        "img_size": IMG_SIZE,
        "classes": CLASS_NAMES,
        "num_classes": len(CLASS_NAMES),
    },
)
print(f"[W&B] {wandb_run.url}")

# HuggingFace
api = HfApi()
try:
    api.create_branch(repo_id=HF_REPO, branch=run_name, exist_ok=True)
    print(f"[HF] https://huggingface.co/{HF_REPO}/tree/{run_name}")
except Exception as e:
    print(f"[HF] {e}")

# ============================================
# CALLBACKS
# ============================================
best_map = [0.0]

def on_epoch_end(trainer):
    epoch = trainer.epoch
    metrics = trainer.metrics

    # Log metrics
    wandb.log({
        "train/box_loss": metrics.get("train/box_loss", 0),
        "train/cls_loss": metrics.get("train/cls_loss", 0),
        "train/dfl_loss": metrics.get("train/dfl_loss", 0),
        "metrics/mAP50": metrics.get("metrics/mAP50(B)", 0),
        "metrics/mAP50-95": metrics.get("metrics/mAP50-95(B)", 0),
        "metrics/precision": metrics.get("metrics/precision(B)", 0),
        "metrics/recall": metrics.get("metrics/recall(B)", 0),
        "epoch": epoch,
    })

    # HuggingFace uploads
    current_map = metrics.get("metrics/mAP50(B)", 0)
    is_best = current_map > best_map[0]
    if is_best:
        best_map[0] = current_map

    save_dir = trainer.save_dir
    last_pt = Path(save_dir) / "weights" / "last.pt"
    best_pt = Path(save_dir) / "weights" / "best.pt"

    # Checkpoint every 5 epochs
    if epoch > 0 and epoch % 5 == 0 and last_pt.exists():
        try:
            upload_file(
                path_or_fileobj=str(last_pt),
                path_in_repo=f"checkpoints/epoch_{epoch:04d}.pt",
                repo_id=HF_REPO,
                revision=run_name,
                commit_message=f"Epoch {epoch}",
            )
            print(f"[HF] ‚úÖ epoch_{epoch:04d}.pt")
        except Exception as e:
            print(f"[HF] ‚ùå {e}")

    # Best model
    if is_best and best_pt.exists():
        try:
            upload_file(
                path_or_fileobj=str(best_pt),
                path_in_repo="best.pt",
                repo_id=HF_REPO,
                revision=run_name,
                commit_message=f"Best model (epoch {epoch}, mAP50={current_map:.4f})",
            )
            print(f"[HF] ‚úÖ best.pt (mAP50={current_map:.4f})")
        except Exception as e:
            print(f"[HF] ‚ùå {e}")

# ============================================
# TRAIN
# ============================================
model = YOLO(MODEL)
model.add_callback("on_train_epoch_end", on_epoch_end)

print(f"\n{'='*60}")
print(f"üöÄ TRAINING: {len(CLASS_NAMES)} classes, {EPOCHS} epochs")
print(f"{'='*60}\n")

results = model.train(
    data=DATA_YAML,
    epochs=EPOCHS,
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    patience=20,

    optimizer="AdamW",
    lr0=0.001,
    lrf=0.01,
    weight_decay=0.0005,

    augment=True,
    mosaic=1.0,
    mixup=0.1,

    project="runs/detect",
    name=run_name,
    exist_ok=True,
    save_period=5,

    device=0,
    workers=4,
    amp=True,

    plots=True,
    save=True,
    val=True,
    verbose=True,
)

wandb.finish()

print(f"\n{'='*60}")
print("‚úÖ TRAINING COMPLETE!")
print(f"{'='*60}")
print(f"üìÅ Results: runs/detect/{run_name}")
print(f"üèÜ Best: runs/detect/{run_name}/weights/best.pt")
print(f"üìä W&B: https://wandb.ai/{WANDB_ENTITY}/{WANDB_PROJECT}")
print(f"ü§ó HF: https://huggingface.co/{HF_REPO}/tree/{run_name}")

Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
üìã Classes: ['shahed', 'drone', 'person', 'bird', 'car', 'bicycle', 'motorbike', 'bus', 'chair', 'laptop', 'monitor']
üìÅ Dataset: merged_dataset

üèÉ Run: multiclass_20251206_171517


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshng2025[0m ([33mImperial-College-London-SPQR[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[W&B] https://wandb.ai/Imperial-College-London-SPQR/European-Defense-Hackathon-Warsaw/runs/2k5ks7fi
[HF] https://huggingface.co/shng2025/EDTH-Warsaw-shahed136-detector/tree/multiclass_20251206_171517
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12m.pt to 'yolo12m.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 39.0MB 21.8MB/s 1.8s

üöÄ TRAINING: 11 classes, 100 epochs

Ultralytics 8.3.235 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA L4, 22693MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=merged_dataset/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fracti

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...06_171517/weights/best.pt:   0%|          |  576kB /  122MB            

[HF] ‚úÖ best.pt (mAP50=0.2266)
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 548/548 3.3it/s 2:46
                   all      17509      51805      0.733       0.25      0.273      0.144

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      3/100      11.5G      1.637      1.812       1.65         27        640: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1095/1095 1.9it/s 9:50


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...06_171517/weights/best.pt:   0%|          | 73.1kB /  122MB            

[HF] ‚úÖ best.pt (mAP50=0.2733)
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 548/548 3.3it/s 2:44
                   all      17509      51805      0.499      0.324      0.332      0.195

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      4/100      11.2G       1.57      1.685      1.609         31        640: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1095/1095 1.9it/s 9:48


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...06_171517/weights/best.pt:   0%|          | 73.1kB /  122MB            

[HF] ‚úÖ best.pt (mAP50=0.3318)
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 548/548 3.3it/s 2:44
                   all      17509      51805      0.618      0.328      0.376      0.221

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      5/100      11.2G      1.519      1.567       1.57         80        640: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 108/1095 1.8it/s 58.5s<8:55

In [None]:
#@title üìä Validation & Per-Class Results
val_metrics = model.val(data=DATA_YAML)

print(f"\nüìä Overall:")
print(f"   mAP50:     {val_metrics.box.map50:.4f}")
print(f"   mAP50-95:  {val_metrics.box.map:.4f}")
print(f"   Precision: {val_metrics.box.mp:.4f}")
print(f"   Recall:    {val_metrics.box.mr:.4f}")

print(f"\nüìã Per-Class AP50:")
for i, ap in enumerate(val_metrics.box.ap50):
    print(f"   {CLASS_NAMES[i]:12s}: {ap:.4f}")

In [None]:
#@title üì• Download Best Model
from google.colab import files
files.download(f"runs/detect/{run_name}/weights/best.pt")