In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0))

# Upgrade pip, setuptools, and wheel (to avoid installation errors)
!pip install -U ultralytics>=8.3.0 pyyaml tqdm opencv-python matplotlib torch torchvision torchaudio

# Install dependencies to handle ONNX models
!pip install onnx onnxruntime

In [1]:
# ============================================================
# YOLOv11n Two-Phase Training (General + Fine-Tune)
# Kaggle-Optimized | Resumes Automatically After Expiration
# ============================================================

!pip uninstall -y wandb

import os
import shutil
import yaml
from pathlib import Path
from ultralytics import YOLO

# ============================================================
# Base Paths
# ============================================================
BASE_DIR = Path("/kaggle/input")
WORKING_DIR = Path("/kaggle/working")
RUNS_DIR = WORKING_DIR / "runs" / "detect"

DATASET_PATH = BASE_DIR / "reefscape" / "data.yaml"
MODEL_PATH = "/kaggle/input/yolo11n/other/default/1/yolo11n.pt"

# ============================================================
# Helpers
# ============================================================
def print_header(text):
    print("\n" + "=" * 65)
    print(f"🚀 {text}")
    print("=" * 65)

def ensure_dataset_writable(input_yaml):
    """Copy dataset into /kaggle/working if needed for write access."""
    cached_yaml = WORKING_DIR / "data.yaml"
    if cached_yaml.exists():
        return str(cached_yaml)

    with open(input_yaml, "r") as f:
        data = yaml.safe_load(f)

    base_dir = input_yaml.parent
    out_base = WORKING_DIR / "reefscape_cached"
    out_base.mkdir(parents=True, exist_ok=True)

    for split in ["train", "val"]:
        src = base_dir / data[split]
        dst = out_base / split
        if not dst.exists():
            print(f"Copying {split} -> {dst}")
            shutil.copytree(src, dst)
        data[split] = str(dst)

    data["path"] = str(out_base)
    with open(cached_yaml, "w") as f:
        yaml.safe_dump(data, f)

    print(f"✅ Cached dataset YAML saved at: {cached_yaml}")
    return str(cached_yaml)

# ============================================================
# Training Function
# ============================================================
def train_phase(name, base_model, data, epochs, batch, imgsz=640, lr0=None):
    """Runs or resumes a YOLO training phase."""
    print_header(f"Starting Phase: {name}")

    project_dir = RUNS_DIR / name
    last_weights = project_dir / "weights" / "last.pt"

    # If session expired, resume automatically
    if last_weights.exists():
        print(f"Resuming from previous session: {last_weights}")
        model = YOLO(str(last_weights))
        resume_flag = True
    else:
        print(f"Starting new training run: {name}")
        model = YOLO(base_model)
        resume_flag = False

    args = dict(
        data=data,
        epochs=epochs,
        batch=batch,
        imgsz=imgsz,
        device="cuda",
        name=name,
        project=str(RUNS_DIR.parent),
        cache=True,
        save=True,
        resume=resume_flag,
        patience=20,
        workers=4
    )

    if lr0:
        args["lr0"] = lr0

    model.train(**args)
    print(f"✅ Phase '{name}' complete!")

# ============================================================
# Main Training Pipeline
# ============================================================
def main():
    print_header("Preparing Dataset and Configuration")
    cached_yaml = ensure_dataset_writable(DATASET_PATH)

    # -----------------------
    # Phase 1: General Train
    # -----------------------
    train_phase(
        name="coral_algae_phase1",
        base_model=MODEL_PATH,
        data=cached_yaml,
        epochs=150,
        batch=64,
        imgsz=640
    )

    # -----------------------
    # Phase 2: Fine-Tune
    # -----------------------
    phase1_best = RUNS_DIR / "coral_algae_phase1" / "weights" / "best.pt"
    if phase1_best.exists():
        fine_tune_base = str(phase1_best)
    else:
        fine_tune_base = MODEL_PATH

    train_phase(
        name="coral_algae_finetune",
        base_model=fine_tune_base,
        data=cached_yaml,
        epochs=50,
        batch=8,  # smaller batch for finer tuning
        imgsz=640,
        lr0=0.001
    )

    print_header("✅ All training phases complete!")

# ============================================================
# Run
# ============================================================
if __name__ == "__main__":
    main()


[0m


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

AttributeError: _ARRAY_API not found

ImportError: numpy.core.multiarray failed to import