In [None]:
# Install requirements
!apt-get update -qq
!apt-get install -y unrar

!pip install -U ultralytics comet_ml rarfile opencv-python tqdm pillow

In [None]:
'''from google.colab import userdata
import os
# Promp if missing comet api key
try:
  os.environ.comet_key = userdata.get("COMET_API_KEY")
except userdata.SecretNotFoundError:
  print("You must set the comet api key as a secret and enable notebook access")


COMET_PROJECT_NAME = "ferret-yolo"
COMET_AUTO_LOG_GRAPH = "true"
COMET_AUTO_LOG_PARAMETERS = "true"
COMET_AUTO_LOG_METRICS = "true" '''

In [None]:
from pathlib import Path
from google.colab import files

content = Path("/content")
archive_file = None
model_path = None

for f in content.iterdir():
    if f.suffix in [".zip", ".rar"] and archive_file is None:
        archive_file = f
    if f.suffix == ".pt" and model_path is None:
        model_path = f

if archive_file is None or model_path is None:
    print("Upload dataset archive (.zip/.rar) and YOLO model (.pt)")
    uploaded = files.upload()
    for f in uploaded:
        p = Path(f)
        if p.suffix in [".zip", ".rar"]:
            archive_file = p
        if p.suffix == ".pt":
            model_path = p

assert archive_file and model_path, "Dataset archive or model missing, upload and rerun this cell"

print("Using:", archive_file, model_path)


In [None]:
import zipfile, rarfile, shutil

dataset_root = Path("/content/raw_dataset")

if not dataset_root.exists():
    dataset_root.mkdir()
    if archive_file.suffix == ".zip":
        with zipfile.ZipFile(archive_file) as z:
            z.extractall(dataset_root)
    else:
        with rarfile.RarFile(archive_file) as r:
            r.extractall(dataset_root)

print("Unpacked dataset")

In [None]:
import os, yaml
from collections import Counter

# Seek dataset
for root, dirs, files in os.walk(dataset_root):
    root = Path(root)
    if {"images", "labels", "classes.txt"} <= set(files + dirs):
        base = root
        break

images_dir = base / "images"
labels_dir = base / "labels"
negatives_dir = base / "negatives"

# Set root
yolo_root = Path("/content/yolo_dataset")
if yolo_root.exists():
    shutil.rmtree(yolo_root)

for p in ["images/train","images/val","labels/train","labels/val"]:
    (yolo_root / p).mkdir(parents=True)

# Train/val split
imgs = sorted(images_dir.glob("*"))
split = int(len(imgs)*0.8)

def copy(imgs, i_dst, l_dst):
    for img in imgs:
        lbl = labels_dir / f"{img.stem}.txt"
        if lbl.exists():
            shutil.copy(img, i_dst/img.name)
            # collapsing class 0
            with open(lbl) as f, open(l_dst/lbl.name, "w") as o:
                for line in f:
                    o.write("0 " + " ".join(line.split()[1:]) + "\n")

copy(imgs[:split], yolo_root/"images/train", yolo_root/"labels/train")
copy(imgs[split:], yolo_root/"images/val", yolo_root/"labels/val")

# Add negative net
if negatives_dir.exists():
    for img in negatives_dir.glob("*"):
        shutil.copy(img, yolo_root/"images/train"/img.name)
        (yolo_root/"labels/train"/f"{img.stem}.txt").touch()

# data.yaml
yaml_path = yolo_root/"data.yaml"
yaml.dump({
    "path": str(yolo_root),
    "train": "images/train",
    "val": "images/val",
    "nc": 1,
    "names": ["ferret"]
}, open(yaml_path,"w"))

print("Ferret Dataclass set.")


In [None]:
from ultralytics import YOLO
import comet_ml
run_dir = Path("/content/runs/detect/ferret2")
weights = run_dir/"weights/best.pt"

model = YOLO(str(weights if weights.exists() else model_path))
comet_ml.login(project_name=COMET_PROJECT_NAME)
model.train(
    project=COMET_PROJECT_NAME,
    save_period=1,
    save_json=True,
    data=str(yaml_path),
    epochs=50,
    imgsz=640,
    batch=16,
    single_cls=True,
    name="ferret",
    resume=weights.exists()
)

In [None]:
from sklearn.metrics import precision_recall_curve
import numpy as np

y_true, y_scores = [], []

for img in (yolo_root/"images/val").glob("*"):
    lbl = yolo_root/"labels/val"/f"{img.stem}.txt"
    has_obj = lbl.stat().st_size > 0
    preds = model.predict(img, conf=0.01, verbose=False)[0]
    max_conf = max([b.conf.item() for b in preds.boxes], default=0)
    y_true.append(int(has_obj))
    y_scores.append(max_conf)

precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
f1 = 2*(precision*recall)/(precision+recall+1e-6)
best_idx = np.argmax(f1)
best_conf = thresholds[best_idx]

print("Optimal confidence threshold:", best_conf)


In [None]:
model.train(
    data=str(yaml_path),
    epochs=20,
    imgsz=640,
    batch=16,
    single_cls=True,
    conf=best_conf,
    resume = True
)