In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# =========================
# Mastomys Natalensis YOLO
# =========================

# 0) Setup
# ----------
!pip -q install ultralytics==8.2.80 pycocotools==2.0.7 opencv-python>=4.10.0.84

import sys, json, shutil, glob, random, math, time
from pathlib import Path
from typing import List, Dict
import cv2
from ultralytics import YOLO

# ---- Config (EDIT IF NEEDED) ----
USE_COCO_TO_YOLO_CONVERTER = False  # True if your dataset is in COCO JSON format
DATASET_DIRNAME = "remote-sensing"  # <-- your dataset under /kaggle/input/
MODEL_VARIANT = "yolov8s.pt"        # yolov8n/s/m/l/x available
PROJECT_NAME = "mastomys-yolo"
RUN_NAME = "exp-mastomys-v1"
SEED = 123
random.seed(SEED); np.random.seed(SEED)

KAGGLE_INPUT = Path('/kaggle/input')
KAGGLE_WORK  = Path('/kaggle/working')

root = KAGGLE_INPUT / DATASET_DIRNAME
if not root.exists():
    avail = [p.name for p in KAGGLE_INPUT.iterdir() if p.is_dir()]
    raise AssertionError(f"Dataset folder not found: {root}. Available datasets: {avail}")

print("Dataset root:", root)

# 1) Detect dataset layout
# -------------------------
yaml_candidates = list(root.rglob('data.yaml'))
data_yaml = yaml_candidates[0] if yaml_candidates else None
has_yolo_dirs = (root/'images').exists() and (root/'labels').exists()
has_coco      = (root/'annotations').exists()

def ensure_yolo_yaml():
    """Create a minimal data.yaml if we have YOLO folders but no YAML."""
    global data_yaml
    tr_img = root/'images'/'train'
    va_img = root/'images'/'val'
    tr_lbl = root/'labels'/'train'
    va_lbl = root/'labels'/'val'
    if not (tr_img.exists() and va_img.exists() and tr_lbl.exists() and va_lbl.exists()):
        raise AssertionError(
            "YOLO layout detected but missing split folders. Expected:\n"
            " images/train, images/val, labels/train, labels/val"
        )
    if not list(tr_lbl.glob('*.txt')):
        raise AssertionError("No YOLO label .txt files found in labels/train. You need annotations to train.")
    import yaml
    data_yaml = root/'data.yaml'
    yml = {
        'path': str(root.resolve()),
        'train': 'images/train',
        'val'  : 'images/val',
        'test' : 'images/test' if (root/'images'/'test').exists() else None,
        'names': ['mastomys'],
        'nc'   : 1
    }
    yml = {k:v for k,v in yml.items() if v is not None}
    with open(data_yaml, 'w') as f:
        yaml.safe_dump(yml, f, sort_keys=False)
    print("Created data.yaml at:", data_yaml)

if data_yaml:
    print("Found YOLO data.yaml:", data_yaml)
elif has_yolo_dirs:
    try:
        ensure_yolo_yaml()
    except AssertionError as e:
        print("⚠️", e)
elif has_coco:
    USE_COCO_TO_YOLO_CONVERTER = True
    print("Detected COCO-style annotations/. Will convert to YOLO...")
else:
    print("No YOLO data.yaml, no YOLO images/labels folders, and no COCO annotations/ found.")
    print("→ If you only have raw images, the notebook will run INFERENCE-ONLY with a pre-trained YOLO.")

# 2) (Optional) Convert COCO -> YOLO
# -----------------------------------
if USE_COCO_TO_YOLO_CONVERTER:
    import yaml
    from collections import defaultdict

    coco_dir = root/'annotations'
    img_root = root/'images'
    save_root = root

    def load_json(p):
        if not p.exists():
            raise FileNotFoundError(f"Missing COCO file: {p}")
        with open(p, 'r') as f:
            return json.load(f)

    coco_train = load_json(coco_dir/'instances_train.json')
    coco_val   = load_json(coco_dir/'instances_val.json')

    def coco_split_to_yolo(coco_obj, split):
        imgs = {im['id']: im for im in coco_obj['images']}
        cats = {c['id']: c for c in coco_obj['categories']}
        id2idx = {cid:i for i,(cid,_) in enumerate(sorted(cats.items()))}
        names = [cats[cid]['name'] for cid in sorted(cats)]
        out_img_dir = save_root/'images'/split
        out_lbl_dir = save_root/'labels'/split
        out_img_dir.mkdir(parents=True, exist_ok=True)
        out_lbl_dir.mkdir(parents=True, exist_ok=True)

        per_img = {}
        for ann in coco_obj['annotations']:
            per_img.setdefault(ann['image_id'], []).append(ann)

        for img_id, im in imgs.items():
            src = img_root/split/im['file_name']
            if not src.exists():
                src = img_root/im['file_name']
            if src.exists():
                shutil.copy(src, out_img_dir/src.name)
            else:
                print('Missing image', src)
            lbl_path = out_lbl_dir/(Path(im['file_name']).stem + '.txt')
            W, H = im['width'], im['height']
            lines = []
            for ann in per_img.get(img_id, []):
                if 'bbox' not in ann:
                    continue
                x,y,w,h = ann['bbox']
                cx = (x + w/2)/W
                cy = (y + h/2)/H
                nw = w/W
                nh = h/H
                cls = id2idx[ann['category_id']]
                lines.append(f"{cls} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}")
            with open(lbl_path,'w') as f:
                f.write('\n'.join(lines))
        return names

    names_train = coco_split_to_yolo(coco_train, 'train')
    names_val   = coco_split_to_yolo(coco_val,   'val')
    names = names_train if names_train==names_val else sorted(set(names_train+names_val))

    import yaml
    data_yaml = root/'data.yaml'
    ydata = {
        'path': str(root.resolve()),
        'train': 'images/train',
        'val': 'images/val',
        'test': 'images/test' if (root/'images'/'test').exists() else None,
        'names': names,
        'nc': len(names)
    }
    ydata = {k:v for k,v in ydata.items() if v is not None}
    with open(data_yaml,'w') as f:
        yaml.safe_dump(ydata, f, sort_keys=False)
    print("Wrote YOLO data.yaml from COCO:", data_yaml)

# Decide: TRAIN vs INFERENCE-ONLY
RUN_TRAINING = data_yaml is not None

# 3) Train (when labels are available)
# ------------------------------------
if RUN_TRAINING:
    from ultralytics.yolo.utils.torch_utils import select_device
    device = select_device('')
    print("Device:", device)

    # Hyperparams tuned for small wildlife datasets (tiny object friendly)
    EPOCHS   = 100
    BATCH    = 16
    IMG_SZ   = 640
    PATIENCE = 20

    overrides = {
        'imgsz': IMG_SZ,
        'epochs': EPOCHS,
        'batch': BATCH,
        'workers': 8,
        'cos_lr': True,
        'patience': PATIENCE,
        'mixup': 0.1,
        'mosaic': 1.0,
        'degrees': 5.0,
        'scale': 0.4,
        'shear': 1.0,
        'flipud': 0.0,
        'fliplr': 0.5,
        'hsv_h': 0.015,
        'hsv_s': 0.7,
        'hsv_v': 0.4,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 1.5,
        'optimizer': 'auto',
        'lr0': 0.01,
        'lrf': 0.01,
    }

    model = YOLO(MODEL_VARIANT)
    print("Starting train …")
    train_result = model.train(
        data=str(data_yaml),
        project=PROJECT_NAME,
        name=RUN_NAME,
        **overrides
    )
    print("Train complete.")
    print(train_result)

    # 4) Validate & visualize a few predictions
    # ------------------------------------------
    val_metrics = model.val()
    print("Validation metrics:", val_metrics)

    val_dir = Path(str(root/'images'/'val'))
    sample_imgs = sorted(val_dir.glob('*.jpg'))[:12] if val_dir.exists() else []
    pred_root = KAGGLE_WORK/'pred_samples'
    pred_root.mkdir(exist_ok=True)

    if sample_imgs:
        _ = model.predict(
            source=[str(p) for p in sample_imgs],
            save=True,
            project=str(pred_root),
            name='preds',
            imgsz=IMG_SZ,
            conf=0.25,
            iou=0.5
        )
        print("Saved sample predictions under:", pred_root)
    else:
        print("No sample val images found to visualize.")

    # 5) Export for deployment
    # -------------------------
    export_dir = KAGGLE_WORK/'exports'
    export_dir.mkdir(exist_ok=True)

    for fmt in ['onnx', 'torchscript', 'openvino']:
        try:
            exported = model.export(format=fmt, dynamic=True, imgsz=IMG_SZ)
            shutil.move(exported, export_dir/Path(exported).name)
            print("Exported:", fmt, "->", export_dir/Path(exported).name)
        except Exception as e:
            print(f"Export {fmt} skipped: {e}")

else:
    # Inference-only demo (no labels found)
    print("⚠️ No labels or data.yaml detected. Running inference-only on available images with a pre-trained model.")
    model = YOLO(MODEL_VARIANT)

    # Collect some images recursively (jpg/png/jpeg) under the dataset
    exts = ('.jpg', '.jpeg', '.png')
    imgs = [str(p) for p in root.rglob('*') if p.suffix.lower() in exts]
    imgs = imgs[:48]  # limit for demo
    if not imgs:
        raise RuntimeError("No images found to run inference on.")

    pred_root = KAGGLE_WORK/'pred_samples'
    pred_root.mkdir(exist_ok=True)

    _ = model.predict(
        source=imgs,
        save=True,
        project=str(pred_root),
        name='preds_nolabels',
        imgsz=640,
        conf=0.25,
        iou=0.5
    )
    print("Saved inference-only predictions under:", pred_root)

# 6) Minimal inference helper (for your application)
# ---------------------------------------------------
class MastomysDetector:
    def __init__(self, weights_path: str = MODEL_VARIANT, conf: float = 0.25, iou: float = 0.5, imgsz: int = 640, device: str = None):
        self.model = YOLO(weights_path)
        self.conf = conf
        self.iou = iou
        self.imgsz = imgsz
        self.device = device or ''

    def predict(self, image_bgr: np.ndarray) -> List[Dict]:
        """Return [{'bbox_xyxy':[x1,y1,x2,y2], 'conf':float, 'cls':int}, ...]"""
        temp_path = KAGGLE_WORK/'_tmp_infer.jpg'
        cv2.imwrite(str(temp_path), image_bgr)
        res = self.model.predict(
            source=str(temp_path),
            imgsz=self.imgsz,
            conf=self.conf,
            iou=self.iou,
            verbose=False,
            device=self.device
        )
        dets = []
        for r in res:
            if r.boxes is None:
                continue
            for b in r.boxes:
                xyxy = b.xyxy.cpu().numpy().reshape(-1).tolist()
                conf = float(b.conf.cpu().numpy().reshape(-1)[0])
                cls  = int(b.cls.cpu().numpy().reshape(-1)[0])
                dets.append({'bbox_xyxy': xyxy, 'conf': conf, 'cls': cls})
        return dets

print("✅ Notebook finished. Check /kaggle/working/pred_samples and /kaggle/working/exports (if training ran).")


/kaggle/input/lassa-fever/_pro_med_202494.csv
/kaggle/input/lassa-fever/_who_dons_202494.csv
/kaggle/input/lassa-fever/_reliefweb_disasters_202494.csv
/kaggle/input/lassa-fever/_event_news_eios_202494_processed.csv
/kaggle/input/lassa-fever/_event_news_eios_202494.csv
/kaggle/input/lassa-fever/_reliefweb_updates_202494.csv
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10037.jpg
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10039.jpg
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10022.jpg
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10006.jpg
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10036.jpg
/kaggle/input/remote-sensing/Photos_of_Natal_Multimammate_Mouse_(Mastomys_natalensis)_·_iNaturalist/10027.jpg
/kaggle/input/r

100%|██████████| 21.5M/21.5M [00:00<00:00, 138MB/s] 



0: 640x640 1 cat, 9.0ms
1: 640x640 1 bird, 9.0ms
2: 640x640 1 dog, 1 surfboard, 9.0ms
3: 640x640 1 person, 9.0ms
4: 640x640 1 bird, 9.0ms
5: 640x640 1 cat, 1 bear, 9.0ms
6: 640x640 1 person, 1 cat, 9.0ms
7: 640x640 1 bear, 9.0ms
8: 640x640 1 bird, 9.0ms
9: 640x640 1 bear, 9.0ms
10: 640x640 1 cat, 1 dog, 9.0ms
11: 640x640 1 bird, 9.0ms
12: 640x640 1 bird, 1 bear, 9.0ms
13: 640x640 1 person, 1 cat, 9.0ms
14: 640x640 (no detections), 9.0ms
15: 640x640 1 cat, 9.0ms
16: 640x640 1 cat, 9.0ms
17: 640x640 1 cat, 9.0ms
18: 640x640 1 bird, 9.0ms
19: 640x640 1 cat, 9.0ms
20: 640x640 1 cat, 9.0ms
21: 640x640 1 bird, 9.0ms
22: 640x640 1 person, 1 dog, 9.0ms
23: 640x640 1 cat, 9.0ms
24: 640x640 1 cat, 9.0ms
25: 640x640 1 bear, 9.0ms
26: 640x640 1 bear, 9.0ms
27: 640x640 1 cat, 1 bed, 9.0ms
28: 640x640 1 cat, 1 carrot, 9.0ms
29: 640x640 1 donut, 9.0ms
30: 640x640 (no detections), 9.0ms
31: 640x640 1 donut, 9.0ms
32: 640x640 (no detections), 9.0ms
33: 640x640 2 persons, 1 cat, 1 dog, 9.0ms
34: 640x64

In [2]:
!pip -q install ultralytics==8.2.80 opencv-python==4.10.0.84 pyyaml==6.0.2
import random, shutil
from pathlib import Path

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m763.0/763.0 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 4.1.1 requires pyarrow>=21.0.0, but you have pyarrow 19.0.1 which is incompatible.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
dopamine-rl 4.1.2 requires gymnasium>=1.0.0, but you have gymnasium 0.29.0 which is incompatible.
tokenizers 0.21.2 requires huggingface-hub<1.0,>=0.16.4, but you have huggingface-hub 1.0.0rc2 which is incompatible.
gradio 5.38.1 requires pydantic<2.12,>=2.0, but you have pydantic 2.12.0a1 which is incompatible.
pydrive2 1.21.3 requires cryptography<44, but you have cryptography 46.

In [3]:
# Minimal, conflict-free installs
!pip -q install --no-deps ultralytics==8.2.80
# Only if you plan to convert COCO -> YOLO:
# !pip -q install --no-deps pycocotools==2.0.7

# (Optional) for CPU ONNX inference/export preview:
# !pip -q install --no-deps onnxruntime==1.18.1

# Quick sanity check
import ultralytics, cv2, sys
print("Ultralytics:", ultralytics.__version__)
print("OpenCV:", cv2.__version__)
print("Python:", sys.version)


Ultralytics: 8.2.80
OpenCV: 4.11.0
Python: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]


In [4]:
from ultralytics import YOLO
from pathlib import Path
import shutil

data_yaml = Path("/kaggle/working/rs_yolo/data.yaml")
assert data_yaml.exists(), "data.yaml missing — run the bootstrap cell first."

model = YOLO("yolov8s.pt")
results = model.train(
    data=str(data_yaml),
    imgsz=640,
    epochs=50,
    batch=16,      # drop to 8 if you hit CUDA OOM
    patience=15,
    cos_lr=True,
    project="mastomys-yolo",
    name="rs_bootstrap_v1"
)


AssertionError: data.yaml missing — run the bootstrap cell first.

In [None]:
# ========= Mastomys Natalensis YOLO (end-to-end, single cell) =========
# Uses your dataset at /kaggle/input/remote-sensing
# 1) Installs  2) Bootstrap YOLO dataset with pseudo-labels  3) Train  4) Validate/Pred  5) Export

# --------------- 1) Install minimal deps (no dependency churn) ---------------
!pip -q install --no-deps ultralytics==8.2.80 pyyaml==6.0.2

# --------------- 2) Imports & config ---------------
import os, shutil, random, json
from pathlib import Path
from typing import List, Dict
import yaml
import cv2
from ultralytics import YOLO

random.seed(42)

# Source dataset (your images only)
SRC_DATA = Path("https://kaggle.com/datasets/05169154a3a6d2c02d6dda0cd3fc378cde000db0572ff106a4380d9407a76f93")
assert SRC_DATA.exists(), f"Source dataset not found: {SRC_DATA}"

# Working YOLO dataset we will create
YOLO_WORK = Path("/kaggle/working/rs_yolo")
IMGS_EXTS = {".jpg", ".jpeg", ".png"}
MAX_IMAGES = None  # set to an int to limit for quick tests (e.g., 300); None uses all

# Training/export config
MODEL_VARIANT = "yolov8s.pt"    # change to yolov8m.pt later for stronger model
IMG_SIZE     = 640
EPOCHS       = 50               # bump to 100+ after you confirm the flow
BATCH        = 16               # drop to 8 if OOM
PROJECT_NAME = "mastomys-yolo"
RUN_NAME     = "rs_bootstrap_v1"

# --------------- 3) Build images list from your dataset ---------------
all_imgs = [p for p in SRC_DATA.rglob("*") if p.suffix.lower() in IMGS_EXTS]
assert all_imgs, f"No images (*.jpg/*.jpeg/*.png) found under {SRC_DATA}"

if MAX_IMAGES is not None:
    all_imgs = all_imgs[:MAX_IMAGES]

print(f"[INFO] Found {len(all_imgs)} images under {SRC_DATA}")

# --------------- 4) Create train/val split (80/20) and copy to working dir ---------------
random.shuffle(all_imgs)
cut = int(0.8 * len(all_imgs))
splits = {"train": all_imgs[:cut], "val": all_imgs[cut:]}

# Make YOLO folders
for sp in ["train", "val"]:
    (YOLO_WORK/"images"/sp).mkdir(parents=True, exist_ok=True)
    (YOLO_WORK/"labels"/sp).mkdir(parents=True, exist_ok=True)

# Copy images
for sp, paths in splits.items():
    for p in paths:
        dst = YOLO_WORK/"images"/sp/p.name
        if not dst.exists():
            shutil.copy(p, dst)
print(f"[INFO] Copied images to {YOLO_WORK/'images'}")

# --------------- 5) Pseudo-label with a pretrained YOLO (bootstrap) ---------------
# NOTE: This only proposes boxes (class 0 = 'mastomys'). You should later correct labels.
bootstrap_model = YOLO(MODEL_VARIANT)

def write_yolo_txt(txt_path: Path, boxes, W: int, H: int):
    lines = []
    for (x1, y1, x2, y2, conf, cls_id) in boxes:
        cx = ((x1 + x2) / 2.0) / W
        cy = ((y1 + y2) / 2.0) / H
        w  = (x2 - x1) / W
        h  = (y2 - y1) / H
        # Treat everything as class 0 to bootstrap 'mastomys'
        lines.append(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")
    with open(txt_path, "w") as f:
        f.write("\n".join(lines))

def bootstrap_split(split_name: str, conf=0.35, iou=0.5):
    img_dir = YOLO_WORK/"images"/split_name
    lbl_dir = YOLO_WORK/"labels"/split_name
    imgs = sorted([p for p in img_dir.iterdir() if p.suffix.lower() in IMGS_EXTS])
    for img_path in imgs:
        im = cv2.imread(str(img_path))
        if im is None:
            print(f"[WARN] Could not read: {img_path}")
            continue
        H, W = im.shape[:2]
        res = bootstrap_model.predict(
            source=str(img_path),
            imgsz=IMG_SIZE, conf=conf, iou=iou, verbose=False
        )
        boxes = []
        for r in res:
            if r.boxes is None: 
                continue
            for b in r.boxes:
                x1, y1, x2, y2 = b.xyxy.cpu().numpy().reshape(-1).tolist()
                c  = float(b.conf.cpu().numpy().reshape(-1)[0])
                cls= int(b.cls.cpu().numpy().reshape(-1)[0])
                boxes.append((x1, y1, x2, y2, c, cls))
        write_yolo_txt(lbl_dir/(img_path.stem + ".txt"), boxes, W, H)

print("[INFO] Bootstrapping pseudo-labels (this may take a while)...")
bootstrap_split("train", conf=0.35, iou=0.5)
bootstrap_split("val",   conf=0.35, iou=0.5)
print("[INFO] Pseudo-labels written to", YOLO_WORK/"labels")

# --------------- 6) Write data.yaml ---------------
data_yaml = YOLO_WORK/"data.yaml"
with open(data_yaml, "w") as f:
    yaml.safe_dump({
        "path": str(YOLO_WORK.resolve()),
        "train": "images/train",
        "val":   "images/val",
        "names": ["mastomys"],
        "nc": 1
    }, f, sort_keys=False)
assert data_yaml.exists(), "data.yaml failed to write"
print("[INFO] Created", data_yaml)

# --------------- 7) Train on the bootstrapped dataset ---------------
print("[INFO] Starting training...")
train_model = YOLO(MODEL_VARIANT)
results = train_model.train(
    data=str(data_yaml),
    imgsz=IMG_SIZE,
    epochs=EPOCHS,
    batch=BATCH,
    patience=15,
    cos_lr=True,
    project=PROJECT_NAME,
    name=RUN_NAME,
    workers=8
)
print("[INFO] Training done.")

# --------------- 8) Validate & save a few prediction images ---------------
print("[INFO] Validating...")
val_metrics = train_model.val()
print("[INFO] Val metrics:", val_metrics)

pred_out = Path("/kaggle/working/pred_samples"); pred_out.mkdir(exist_ok=True)
_ = train_model.predict(
    source=str(YOLO_WORK/"images"/"val"),
    save=True, project=str(pred_out), name="val_samples",
    imgsz=IMG_SIZE, conf=0.25, iou=0.5
)
print("[INFO] Saved prediction samples to:", pred_out)

# --------------- 9) Export weights for your app ---------------
print("[INFO] Exporting weights...")
export_dir = Path("/kaggle/working/exports"); export_dir.mkdir(exist_ok=True)
for fmt in ["onnx", "torchscript", "openvino"]:
    try:
        out = train_model.export(format=fmt, dynamic=True, imgsz=IMG_SIZE)
        shutil.move(out, export_dir/Path(out).name)
        print(f"[INFO] Exported: {fmt}")
    except Exception as e:
        print(f"[WARN] Export {fmt} skipped: {e}")

print("\n✅ DONE.")
print(" - YOLO dataset:", YOLO_WORK)
print(" - Training run:", f"{PROJECT_NAME}/{RUN_NAME}")
print(" - Predictions :", pred_out)
print(" - Exports     :", export_dir)


In [None]:
# ========= Mastomys Natalensis YOLO (auto-detect dataset under /kaggle/input) =========
!pip -q install --no-deps ultralytics==8.2.80 pyyaml==6.0.2

import os, shutil, random
from pathlib import Path
from typing import List, Dict
import yaml, cv2
from ultralytics import YOLO

# ---------------- Config (edit if you want) ----------------
PREFERRED_DATASET_NAME = "remote-sensing"  # try this first if present
IMG_EXTS = {".jpg", ".jpeg", ".png"}
MODEL_VARIANT = "yolov8s.pt"
IMG_SIZE     = 640
EPOCHS       = 50
BATCH        = 16
PROJECT_NAME = "mastomys-yolo"
RUN_NAME     = "rs_bootstrap_v1"
random.seed(42)

# ---------------- Find dataset mounted under /kaggle/input ----------------
INPUT_ROOT = Path("/kaggle/input")
assert INPUT_ROOT.exists(), "Kaggle input mount missing."

def count_imgs(d: Path) -> int:
    return sum(1 for _ in d.rglob("*") if _.suffix.lower() in IMG_EXTS)

candidates = [p for p in INPUT_ROOT.iterdir() if p.is_dir()]
assert candidates, "No datasets attached. Click 'Add data' in the right sidebar."

# Prefer a directory matching PREFERRED_DATASET_NAME; else pick the one with most images
src = None
for c in candidates:
    if c.name.lower() == PREFERRED_DATASET_NAME.lower():
        src = c; break
if src is None:
    # pick dataset with most images
    img_counts = [(c, count_imgs(c)) for c in candidates]
    img_counts.sort(key=lambda x: x[1], reverse=True)
    src, nimgs = img_counts[0]
    if nimgs == 0:
        raise AssertionError(f"No images (*.jpg/*.jpeg/*.png) found in any dataset under {INPUT_ROOT}. Attached: {[c.name for c in candidates]}")
print(f"[INFO] Using dataset: {src}")

# ---------------- Collect images ----------------
all_imgs = [p for p in src.rglob("*") if p.suffix.lower() in IMG_EXTS]
assert all_imgs, f"No images found under {src}"
print(f"[INFO] Found {len(all_imgs)} images")

# ---------------- Create working YOLO dataset ----------------
YOLO_WORK = Path("/kaggle/working/rs_yolo")
for sp in ["train","val"]:
    (YOLO_WORK/"images"/sp).mkdir(parents=True, exist_ok=True)
    (YOLO_WORK/"labels"/sp).mkdir(parents=True, exist_ok=True)

random.shuffle(all_imgs)
cut = int(0.8*len(all_imgs))
splits = {"train": all_imgs[:cut], "val": all_imgs[cut:]}

for sp, paths in splits.items():
    for p in paths:
        dst = YOLO_WORK/"images"/sp/p.name
        if not dst.exists():
            shutil.copy(p, dst)
print(f"[INFO] Copied images to {YOLO_WORK/'images'}")

# ---------------- Bootstrap pseudo-labels with pretrained YOLO ----------------
bootstrap_model = YOLO(MODEL_VARIANT)

def write_yolo_txt(txt_path: Path, boxes, W: int, H: int):
    lines = []
    for (x1,y1,x2,y2,conf,cls_id) in boxes:
        cx = ((x1+x2)/2)/W; cy = ((y1+y2)/2)/H; w = (x2-x1)/W; h = (y2-y1)/H
        lines.append(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")  # single class 0
    with open(txt_path, "w") as f: f.write("\n".join(lines))

def bootstrap_split(split_name: str, conf=0.35, iou=0.5):
    img_dir = YOLO_WORK/"images"/split_name
    lbl_dir = YOLO_WORK/"labels"/split_name
    imgs = sorted([p for p in img_dir.iterdir() if p.suffix.lower() in IMG_EXTS])
    for img_path in imgs:
        im = cv2.imread(str(img_path))
        if im is None: 
            print(f"[WARN] unreadable: {img_path}"); 
            continue
        H, W = im.shape[:2]
        res = bootstrap_model.predict(source=str(img_path), imgsz=IMG_SIZE, conf=conf, iou=iou, verbose=False)
        boxes = []
        for r in res:
            if r.boxes is None: continue
            for b in r.boxes:
                x1,y1,x2,y2 = b.xyxy.cpu().numpy().reshape(-1).tolist()
                c  = float(b.conf.cpu().numpy().reshape(-1)[0])
                cls= int(b.cls.cpu().numpy().reshape(-1)[0])
                boxes.append((x1,y1,x2,y2,c,cls))
        write_yolo_txt(lbl_dir/(img_path.stem + ".txt"), boxes, W, H)

print("[INFO] Bootstrapping pseudo-labels…")
bootstrap_split("train", conf=0.35, iou=0.5)
bootstrap_split("val",   conf=0.35, iou=0.5)
print("[INFO] Pseudo-labels written to", YOLO_WORK/"labels")

# ---------------- Write data.yaml ----------------
data_yaml = YOLO_WORK/"data.yaml"
with open(data_yaml,"w") as f:
    yaml.safe_dump({
        "path": str(YOLO_WORK.resolve()),
        "train": "images/train",
        "val"  : "images/val",
        "names": ["mastomys"],
        "nc"   : 1
    }, f, sort_keys=False)
print("[INFO] Created", data_yaml)

# ---------------- Train ----------------
print("[INFO] Training…")
model = YOLO(MODEL_VARIANT)
res = model.train(
    data=str(data_yaml),
    imgsz=IMG_SIZE,
    epochs=EPOCHS,
    batch=BATCH,
    patience=15,
    cos_lr=True,
    project=PROJECT_NAME,
    name=RUN_NAME,
    workers=8
)
print("[INFO] Training done.")

# ---------------- Validate & save prediction images ----------------
from pathlib import Path as _P
pred_out = _P("/kaggle/working/pred_samples"); pred_out.mkdir(exist_ok=True)
_ = model.predict(source=str(YOLO_WORK/"images"/"val"), save=True, project=str(pred_out), name="val_samples",
                  imgsz=IMG_SIZE, conf=0.25, iou=0.5)
print("[INFO] Saved predictions to:", pred_out)

# ---------------- Export weights ----------------
export_dir = _P("/kaggle/working/exports"); export_dir.mkdir(exist_ok=True)
for fmt in ["onnx","torchscript","openvino"]:
    try:
        out = model.export(format=fmt, dynamic=True, imgsz=IMG_SIZE)
        shutil.move(out, export_dir/_P(out).name)
        print(f"[INFO] Exported: {fmt}")
    except Exception as e:
        print(f"[WARN] Export {fmt} skipped: {e}")

print("\n✅ DONE")
print(" - Dataset used :", src)
print(" - YOLO dataset :", YOLO_WORK)
print(" - Run directory:", f\"{PROJECT_NAME}/{RUN_NAME}\")
print(" - Predictions  :", pred_out)
print(" - Exports      :", export_dir)


In [None]:
from pathlib import Path
IMG_EXTS = {".jpg",".jpeg",".png"}
ROOT = Path("/datasets/mostarindustries/remote-sensing/")  # <- this must exist

print("Mounted:", ROOT.exists(), ROOT)
imgs = [p for p in ROOT.rglob("*") if p.suffix.lower() in IMG_EXTS]
print("Found images:", len(imgs))
print("Sample paths:", [str(imgs[i]) for i in range(min(5, len(imgs)))])


In [None]:
from pathlib import Path
INPUT = Path("/kaggle/input")
dirs = [p for p in INPUT.iterdir() if p.is_dir()]
print("Mounted datasets:", [d.name for d in dirs])

# pick the one that starts with "remote-sensing"
src = next((d for d in dirs if d.name.lower().startswith("remote-sensing")), None)
print("Selected:", src)


In [None]:
# ========= Mastomys Natalensis YOLO (auto-detect remote-sensing*, end-to-end) =========
!pip -q install --no-deps ultralytics==8.2.80 pyyaml==6.0.2

import os, shutil, random
from pathlib import Path
import yaml, cv2
from ultralytics import YOLO

random.seed(42)

# --- auto-pick the mounted dataset whose name begins with "remote-sensing" ---
INPUT_ROOT = Path("/kaggle/input")
candidates = [p for p in INPUT_ROOT.iterdir() if p.is_dir() and p.name.lower().startswith("remote-sensing")]
assert candidates, "Dataset not attached. Click 'Add data' and add mostarindustries/remote-sensing."
SRC = candidates[0]  # first match (handles remote-sensing, remote-sensing-1, etc.)
print("[INFO] Using dataset:", SRC)

IMG_EXTS = {".jpg",".jpeg",".png"}
YOLO_WORK = Path("/kaggle/working/rs_yolo")
MODEL_VARIANT = "yolov8s.pt"
IMG_SIZE, EPOCHS, BATCH = 640, 50, 16
PROJECT_NAME, RUN_NAME = "mastomys-yolo", "rs_bootstrap_v1"

# --- gather images recursively ---
all_imgs = [p for p in SRC.rglob("*") if p.suffix.lower() in IMG_EXTS]
assert all_imgs, f"No images found under {SRC}"
print(f"[INFO] Found {len(all_imgs)} images")

# --- make YOLO folders & 80/20 split ---
for sp in ("train","val"):
    (YOLO_WORK/"images"/sp).mkdir(parents=True, exist_ok=True)
    (YOLO_WORK/"labels"/sp).mkdir(parents=True, exist_ok=True)
random.shuffle(all_imgs)
cut = int(0.8*len(all_imgs))
splits = {"train": all_imgs[:cut], "val": all_imgs[cut:]}
for sp, paths in splits.items():
    for s in paths:
        d = YOLO_WORK/"images"/sp/s.name
        if not d.exists():
            shutil.copy(s, d)
print("[INFO] Copied to", YOLO_WORK/"images")

# --- bootstrap pseudo-labels (all detections → class 0 'mastomys') ---
mdl_boot = YOLO(MODEL_VARIANT)
def write_lbl(txt_path, boxes, W, H):
    with open(txt_path,"w") as f:
        for (x1,y1,x2,y2,conf,cls_id) in boxes:
            cx=((x1+x2)/2)/W; cy=((y1+y2)/2)/H; w=(x2-x1)/W; h=(y2-y1)/H
            f.write(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

def bootstrap(split, conf=0.30, iou=0.5):
    img_dir, lbl_dir = YOLO_WORK/"images"/split, YOLO_WORK/"labels"/split
    for img in img_dir.iterdir():
        if img.suffix.lower() not in IMG_EXTS: continue
        im = cv2.imread(str(img)); 
        if im is None: 
            print("[WARN] unreadable:", img); 
            continue
        H,W = im.shape[:2]
        res = mdl_boot.predict(str(img), imgsz=IMG_SIZE, conf=conf, iou=iou, verbose=False)
        boxes=[]
        for r in res:
            if r.boxes is None: continue
            for b in r.boxes:
                x1,y1,x2,y2 = b.xyxy.cpu().numpy().reshape(-1).tolist()
                c  = float(b.conf.cpu().numpy().reshape(-1)[0])
                cls= int(b.cls.cpu().numpy().reshape(-1)[0])
                boxes.append((x1,y1,x2,y2,c,cls))
        write_lbl(lbl_dir/(img.stem+".txt"), boxes, W, H)

print("[INFO] Bootstrapping labels…")
bootstrap("train", conf=0.30); bootstrap("val", conf=0.30)
print("[INFO] Labels written to", YOLO_WORK/"labels")

# --- data.yaml ---
data_yaml = YOLO_WORK/"data.yaml"
with open(data_yaml,"w") as f:
    yaml.safe_dump({"path": str(YOLO_WORK.resolve()), "train":"images/train", "val":"images/val",
                    "names":["mastomys"], "nc":1}, f, sort_keys=False)
print("[INFO] data.yaml:", data_yaml)

# --- train ---
print("[INFO] Training…")
model = YOLO(MODEL_VARIANT)
model.train(data=str(data_yaml), imgsz=IMG_SIZE, epochs=EPOCHS, batch=BATCH,
            patience=15, cos_lr=True, project=PROJECT_NAME, name=RUN_NAME, workers=8)
print("[INFO] Training done.")

# --- predict samples ---
pred_dir = Path("/kaggle/working/pred_samples"); pred_dir.mkdir(exist_ok=True)
model.predict(source=str(YOLO_WORK/"images"/"val"), save=True, project=str(pred_dir),
              name="val_samples", imgsz=IMG_SIZE, conf=0.25, iou=0.5)
print("[INFO] Samples →", pred_dir)

# --- export ---
export_dir = Path("/kaggle/working/exports"); export_dir.mkdir(exist_ok=True)
for fmt in ("onnx","torchscript","openvino"):
    try:
        out = model.export(format=fmt, dynamic=True, imgsz=IMG_SIZE)
        shutil.move(out, export_dir/Path(out).name)
        print("[INFO] Exported:", fmt)
    except Exception as e:
        print("[WARN] Export", fmt, "skipped:", e)

print("\n✅ DONE\nYOLO dataset:", YOLO_WORK, "\nRun:", f"{PROJECT_NAME}/{RUN_NAME}",
      "\nPredictions:", pred_dir, "\nExports:", export_dir)


In [None]:
from ultralytics import YOLO
from pathlib import Path
import shutil

data_yaml = Path("/kaggle/working/rs_yolo/data.yaml")
assert data_yaml.exists(), "data.yaml missing"

model = YOLO("yolov8s.pt")
results = model.train(
    data=str(data_yaml),
    imgsz=512,   # smaller image for speed
    epochs=10,   # quick baseline
    batch=8,     # lower RAM
    patience=5,
    cos_lr=True,
    workers=0,   # safer on Kaggle CPU
    project="mastomys-yolo",
    name="rs_quick_cpu"
)


In [None]:
pip install -U ultralytics

In [None]:
from ultralytics import YOLO
from pathlib import Path

run_dir = Path("mastomys-yolo/rs_bootstrap_v1")
best = run_dir/"weights"/"best.pt"
assert best.exists(), f"best.pt not found in {run_dir/'weights'}"

pred_dir = Path("/kaggle/working/pred_samples"); pred_dir.mkdir(exist_ok=True)
YOLO(str(best)).predict(
    source="/kaggle/working/rs_yolo/images/val",
    save=True, project=str(pred_dir), name="val_samples",
    imgsz=640, conf=0.25, iou=0.5
)
print("Predictions saved to:", pred_dir)


In [None]:
# ===== Package trained model artifacts for publishing =====
from pathlib import Path
import shutil, json, textwrap, time, yaml

RUN_DIR      = Path("mastomys-yolo/rs_bootstrap_v1")
BEST_PT      = RUN_DIR/"weights"/"best.pt"
LAST_PT      = RUN_DIR/"weights"/"last.pt"
YOLO_DATA    = Path("/kaggle/working/rs_yolo/data.yaml")
EXPORTS_DIR  = Path("/kaggle/working/exports")
RELEASE_DIR  = Path("/kaggle/working/release"); RELEASE_DIR.mkdir(exist_ok=True, parents=True)

assert BEST_PT.exists(), f"best.pt not found in {RUN_DIR/'weights'}"
assert YOLO_DATA.exists(), "data.yaml missing"

# 1) Gather files
files = []
files += [BEST_PT, LAST_PT] if LAST_PT.exists() else [BEST_PT]
if EXPORTS_DIR.exists():
    files += list(EXPORTS_DIR.glob("*"))  # onnx/torchscript/openvino if present
files.append(YOLO_DATA)

# 2) Minimal model card (README.md)
with open(YOLO_DATA) as f:
    y = yaml.safe_load(f)
names = y.get("names", ["mastomys"])
card = f"""# Mastomys Natalensis Detector (YOLOv8s)

**Task:** Object detection  
**Classes:** {names}  
**Source images:** `/kaggle/input/remote-sensing` (bootstrapped labels)  
**Training run:** `{RUN_DIR}`

## Files
- `best.pt` — Ultralytics YOLOv8 weights (recommended)
- `last.pt` — last epoch
- Exports (if present): `.onnx`, `.torchscript`, `openvino`  
- `data.yaml` — dataset spec used for training

## Usage
```python
from ultralytics import YOLO
m = YOLO('best.pt')
m.predict('some_image.jpg', imgsz=640, conf=0.25)


In [None]:
# ===== Package trained model artifacts for publishing (no Markdown fences) =====
from pathlib import Path
import shutil, yaml, textwrap

# Adjust this if you trained with a different run name
RUN_DIR = Path("mastomys-yolo/rs_bootstrap_v1")
BEST_PT = RUN_DIR / "weights" / "best.pt"
LAST_PT = RUN_DIR / "weights" / "last.pt"
YOLO_DATA = Path("/kaggle/working/rs_yolo/data.yaml")
EXPORTS_DIR = Path("/kaggle/working/exports")

RELEASE_DIR = Path("/kaggle/working/release")
PKG_DIR = RELEASE_DIR / "mastomys_yolo_v1"
RELEASE_DIR.mkdir(parents=True, exist_ok=True)
PKG_DIR.mkdir(parents=True, exist_ok=True)

assert BEST_PT.exists(), f"best.pt not found: {BEST_PT}"
assert YOLO_DATA.exists(), f"data.yaml not found: {YOLO_DATA}"

# 1) Copy core files
to_copy = [BEST_PT]
if LAST_PT.exists():
    to_copy.append(LAST_PT)
if EXPORTS_DIR.exists():
    to_copy += list(EXPORTS_DIR.glob("*"))  # onnx / torchscript / openvino if present
to_copy.append(YOLO_DATA)

for src in to_copy:
    shutil.copy(src, PKG_DIR / src.name)

# 2) Write a simple README (no backticks)
with open(YOLO_DATA) as f:
    y = yaml.safe_load(f)
names = y.get("names", ["mastomys"])
readme = textwrap.dedent(f"""
    Mastomys Natalensis Detector (YOLOv8s)

    Task: Object detection
    Classes: {names}
    Source images: /kaggle/input/remote-sensing  (pseudo-labels were bootstrapped)
    Training run: {RUN_DIR}

    Files
    - best.pt  : Ultralytics YOLOv8 weights (recommended)
    - last.pt  : last epoch (if present)
    - exports  : ONNX / TorchScript / OpenVINO (if present)
    - data.yaml: dataset spec used for training

    Usage (Python)
    from ultralytics import YOLO
    m = YOLO("best.pt")
    m.predict("some_image.jpg", imgsz=640, conf=0.25)
""").strip() + "\n"
(PKG_DIR / "README.txt").write_text(readme)

# 3) Zip the package so it shows in Output
zip_path = RELEASE_DIR / "mastomys_yolo_v1.zip"
shutil.make_archive(str(zip_path.with_suffix('')), "zip", PKG_DIR)

print("✅ Package ready")
print("Folder :", PKG_DIR)
print("Archive:", zip_path)


In [None]:
from pathlib import Path
print("best.pt exists:", Path("mastomys-yolo/rs_bootstrap_v1/weights/best.pt").exists())
print("data.yaml exists:", Path("/kaggle/working/rs_yolo/data.yaml").exists())


In [None]:
from pathlib import Path
IMG_EXTS = {".jpg",".jpeg",".png"}
INPUT = Path("/kaggle/input")
print("Mounted:", [d.name for d in INPUT.iterdir() if d.is_dir()])

SRC = next((d for d in INPUT.iterdir() if d.is_dir() and d.name.lower().startswith("remote-sensing")), None)
assert SRC is not None, "Add your dataset via the right sidebar (Add data)."
imgs = [p for p in SRC.rglob("*") if p.suffix.lower() in IMG_EXTS]
print("Using:", SRC, " — images:", len(imgs))


In [None]:
# ========= Build rs_yolo + Train (CPU quick) + Predict + Export =========
!pip -q install --no-deps ultralytics==8.2.80 pyyaml==6.0.2

import os, shutil, random
from pathlib import Path
import yaml, cv2
from ultralytics import YOLO

random.seed(42)

INPUT_ROOT = Path("/kaggle/input")
SRC = next(d for d in INPUT_ROOT.iterdir() if d.is_dir() and d.name.lower().startswith("remote-sensing"))
print("[INFO] Using dataset:", SRC)

IMG_EXTS = {".jpg",".jpeg",".png"}
all_imgs = [p for p in SRC.rglob("*") if p.suffix.lower() in IMG_EXTS]
assert all_imgs, f"No images found under {SRC}"
print(f"[INFO] Found {len(all_imgs)} images")

YOLO_WORK = Path("/kaggle/working/rs_yolo")
for sp in ("train","val"):
    (YOLO_WORK/"images"/sp).mkdir(parents=True, exist_ok=True)
    (YOLO_WORK/"labels"/sp).mkdir(parents=True, exist_ok=True)

random.shuffle(all_imgs)
cut = int(0.8*len(all_imgs))
splits = {"train": all_imgs[:cut], "val": all_imgs[cut:]}
for sp, paths in splits.items():
    for s in paths:
        d = YOLO_WORK/"images"/sp/s.name
        if not d.exists():
            shutil.copy(s, d)
print("[INFO] Copied images →", YOLO_WORK/"images")

bootstrap = YOLO("yolov8s.pt")
def write_lbl(txt_path, boxes, W, H):
    with open(txt_path,"w") as f:
        for (x1,y1,x2,y2,conf,cls_id) in boxes:
            cx=((x1+x2)/2)/W; cy=((y1+y2)/2)/H; w=(x2-x1)/W; h=(y2-y1)/H
            f.write(f"0 {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

def bootstrap_split(split, conf=0.30, iou=0.5):
    img_dir, lbl_dir = YOLO_WORK/"images"/split, YOLO_WORK/"labels"/split
    imgs = [p for p in img_dir.iterdir() if p.suffix.lower() in IMG_EXTS]
    for img in imgs:
        im = cv2.imread(str(img))
        if im is None:
            print("[WARN] unreadable:", img); 
            continue
        H,W = im.shape[:2]
        res = bootstrap.predict(str(img), imgsz=640, conf=conf, iou=iou, verbose=False)
        boxes=[]
        for r in res:
            if r.boxes is None: continue
            for b in r.boxes:
                x1,y1,x2,y2 = b.xyxy.cpu().numpy().reshape(-1).tolist()
                c  = float(b.conf.cpu().numpy().reshape(-1)[0])
                cls= int(b.cls.cpu().numpy().reshape(-1)[0])
                boxes.append((x1,y1,x2,y2,c,cls))
        write_lbl((YOLO_WORK/"labels"/split/(img.stem+".txt")), boxes, W, H)

print("[INFO] Bootstrapping labels…")
bootstrap_split("train", conf=0.30); bootstrap_split("val", conf=0.30)
print("[INFO] Labels →", YOLO_WORK/"labels")

data_yaml = YOLO_WORK/"data.yaml"
with open(data_yaml,"w") as f:
    yaml.safe_dump({"path": str(YOLO_WORK.resolve()),
                    "train":"images/train","val":"images/val",
                    "names":["mastomys"],"nc":1}, f, sort_keys=False)
print("[INFO] data.yaml:", data_yaml)

print("[INFO] Training (quick CPU baseline)…")
model = YOLO("yolov8s.pt")
model.train(data=str(data_yaml),
            imgsz=512, epochs=10, batch=8, patience=5,
            cos_lr=True, workers=0,
            project="mastomys-yolo", name="rs_bootstrap_v1")

from pathlib import Path as _P
best = _P("mastomys-yolo/rs_bootstrap_v1/weights/best.pt")
print("[INFO] best.pt exists:", best.exists(), best)

pred_dir = _P("/kaggle/working/pred_samples"); pred_dir.mkdir(exist_ok=True)
YOLO(str(best) if best.exists() else "yolov8s.pt").predict(
    source=str(YOLO_WORK/"images"/"val"),
    save=True, project=str(pred_dir), name="val_samples",
    imgsz=640, conf=0.25, iou=0.5
)
print("[INFO] Samples →", pred_dir)

export_dir = _P("/kaggle/working/exports"); export_dir.mkdir(exist_ok=True)
if best.exists():
    m = YOLO(str(best))
    for fmt in ("onnx","torchscript","openvino"):
        try:
            out = m.export(format=fmt, dynamic=True, imgsz=640)
            shutil.move(out, export_dir/_P(out).name)
            print("[INFO] Exported:", fmt)
        except Exception as e:
            print("[WARN] Export", fmt, "skipped:", e)
print("[INFO] Exports →", export_dir)


In [None]:
# ===== Package artifacts to publish as a Kaggle Dataset =====
from pathlib import Path
import shutil, yaml, textwrap

RUN_DIR = Path("mastomys-yolo/rs_bootstrap_v1")
BEST_PT = RUN_DIR/"weights"/"best.pt"
YOLO_DATA = Path("/kaggle/working/rs_yolo/data.yaml")
EXPORTS_DIR = Path("/kaggle/working/exports")

RELEASE_DIR = Path("/kaggle/working/release"); PKG_DIR = RELEASE_DIR/"mastomys_yolo_v1"
RELEASE_DIR.mkdir(parents=True, exist_ok=True); PKG_DIR.mkdir(parents=True, exist_ok=True)

assert YOLO_DATA.exists(), "data.yaml missing — run Cell A"
if BEST_PT.exists(): shutil.copy(BEST_PT, PKG_DIR/"best.pt")
if (RUN_DIR/"weights"/"last.pt").exists(): shutil.copy(RUN_DIR/"weights"/"last.pt", PKG_DIR/"last.pt")
if EXPORTS_DIR.exists():
    for f in EXPORTS_DIR.glob("*"):
        shutil.copy(f, PKG_DIR/f.name)
shutil.copy(YOLO_DATA, PKG_DIR/"data.yaml")

names = yaml.safe_load(open(YOLO_DATA))["names"]
readme = textwrap.dedent(f"""
Mastomys Natalensis Detector (YOLOv8s)
Classes: {names}
Training run: {RUN_DIR}

Files:
- best.pt (if training finished)
- last.pt (if present)
- exports: ONNX / TorchScript / OpenVINO (if present)
- data.yaml

Usage:
from ultralytics import YOLO
m = YOLO("best.pt")
m.predict("image.jpg", imgsz=640, conf=0.25)
""").strip()+"\n"
(PKG_DIR/"README.txt").write_text(readme)

zip_base = RELEASE_DIR/"mastomys_yolo_v1"
shutil.make_archive(str(zip_base), "zip", PKG_DIR)
print("✅ Ready to publish:", zip_base.with_suffix(".zip"))
