In [1]:
# === DARKNET GPU BUILD CELL — Compile Darknet with CUDA support ===
import os
import subprocess
import shutil
from pathlib import Path
import re
import stat
import time
import sys

# Add option to force GPU build even when libcuda not found
FORCE_GPU_BUILD = os.environ.get("FORCE_GPU_BUILD", "1") == "1"  # Default to force for Kaggle

# Use the DK_ROOT from the environment detection cell
try:
    DK_ROOT = globals().get('DK_ROOT')
    if DK_ROOT is None:
        raise NameError("DK_ROOT not found")
    print(f"✅ Using detected path: {DK_ROOT}")
except NameError:
    print("⚠️  Environment detection cell not run, using fallback detection")
    if Path("/kaggle/working").exists():
        DK_ROOT = Path("/kaggle/working")
        print(f"Fallback: Using Kaggle path: {DK_ROOT}")
    else:
        DK_ROOT = Path.cwd() / "yolo-files"
        print(f"Fallback: Using local path: {DK_ROOT}")

print(f"🔧 Building Darknet with GPU support in: {DK_ROOT}")

# Enhanced GPU detection similar to your reference
def detect_gpu():
    reasons = []
    
    # 1) Honor explicit CUDA_VISIBLE_DEVICES
    env = os.environ.get("CUDA_VISIBLE_DEVICES")
    if env and env != "" and env != "-1":
        return True, f"CUDA_VISIBLE_DEVICES={env}"
    
    # 2) Device nodes present
    if os.path.exists("/dev/nvidia0"):
        reasons.append("/dev/nvidia0 present")
    if os.path.exists("/dev/nvidiactl"):
        reasons.append("/dev/nvidiactl present")
    
    # 3) Kernel driver info
    try:
        if os.path.isdir("/proc/driver/nvidia/gpus") and os.listdir("/proc/driver/nvidia/gpus"):
            gpu_dirs = os.listdir("/proc/driver/nvidia/gpus")
            reasons.append(f"/proc/driver/nvidia/gpus present ({len(gpu_dirs)} GPUs)")
    except Exception:
        pass
    
    # 4) Check for CUDA toolkit
    if os.path.exists("/usr/local/cuda"):
        reasons.append("/usr/local/cuda exists")
    if os.path.exists("/usr/local/cuda/bin/nvcc"):
        reasons.append("nvcc found")
    
    # 5) nvidia-smi check
    nsm = shutil.which("nvidia-smi")
    if nsm:
        try:
            out = subprocess.run([nsm, "-L"], text=True, capture_output=True, check=False)
            if out.returncode == 0 and "GPU" in (out.stdout or ""):
                gpu_lines = [line for line in out.stdout.splitlines() if "GPU" in line]
                reasons.append(f"nvidia-smi: {len(gpu_lines)} GPU(s)")
                if gpu_lines:
                    reasons.append(f"First GPU: {gpu_lines[0]}")
                # Auto-enable FORCE_GPU_BUILD if T4 detected
                if any("T4" in line for line in gpu_lines):
                    global FORCE_GPU_BUILD
                    FORCE_GPU_BUILD = True
                    reasons.append("T4 detected - auto-enabling FORCE_GPU_BUILD")
                return True, "; ".join(reasons)
        except Exception as e:
            reasons.append(f"nvidia-smi error: {e}")
    
    # If we have strong indicators, still consider GPU present
    if len(reasons) >= 2:
        return True, "; ".join(reasons) + " (assuming GPU present despite nvidia-smi issues)"
    
    return False, "no GPU indicators found"

# Enhanced libcuda search
def find_and_prepare_libcuda():
    print("🔍 Searching for libcuda...")
    
    # 1) Try ldconfig first
    try:
        out = subprocess.check_output("ldconfig -p | grep libcuda", shell=True, text=True, stderr=subprocess.DEVNULL)
        print(f"📋 ldconfig output:\n{out}")
        for line in out.splitlines():
            m = re.search(r'=>\s*(\S*libcuda\.so(?:\.\d+)*)', line)
            if m:
                verpath = m.group(1)
                d = os.path.dirname(verpath)
                base = os.path.basename(verpath)
                if base == "libcuda.so":
                    return {"dir": d, "use_symlink": False}
                else:
                    return {"dir": d, "verfile": base, "verpath": verpath, "use_symlink": True}
    except Exception as e:
        print(f"⚠️  ldconfig failed: {e}")

    # 2) Expanded search paths for Kaggle
    candidates = [
        "/usr/lib/x86_64-linux-gnu",
        "/lib/x86_64-linux-gnu", 
        "/usr/lib",
        "/usr/lib64",
        "/lib",
        "/usr/local/cuda/lib64",
        "/usr/local/cuda/lib64/stubs",
        "/opt/conda/lib",
        "/usr/local/lib",
        "/usr/local/nvidia/lib64",
        "/usr/local/nvidia/lib"
    ]
    
    print(f"🔍 Checking directories: {candidates}")
    for d in candidates:
        try:
            if not os.path.exists(d):
                continue
            files = os.listdir(d)
            cuda_files = [f for f in files if "libcuda" in f]
            if cuda_files:
                print(f"📁 {d} contains: {cuda_files}")
            
            if "libcuda.so" in files:
                return {"dir": d, "use_symlink": False}
            for f in files:
                if f.startswith("libcuda.so."):
                    return {"dir": d, "verfile": f, "verpath": os.path.join(d, f), "use_symlink": True}
        except Exception as e:
            print(f"⚠️  Error checking {d}: {e}")
    
    return None

# Check if darknet already exists and test for GPU support
if (DK_ROOT / "darknet").exists():
    print("✅ Darknet executable already exists")
    
    # Test if existing darknet is GPU-enabled
    try:
        test_result = subprocess.run(
            [str(DK_ROOT / "darknet")], 
            capture_output=True, 
            text=True, 
            timeout=10,
            cwd=str(DK_ROOT),
            input="\n"
        )
        
        full_output = (test_result.stdout or "") + (test_result.stderr or "")
        if "isn't used" in full_output or "GPU is not used" in full_output:
            print("⚠️  Existing darknet is CPU-only - forcing rebuild for GPU version")
            os.remove(DK_ROOT / "darknet")  # Delete CPU version
        else:
            print("✅ Existing darknet appears to have GPU support - skipping build")
            print("    (Delete darknet file manually to force rebuild)")
            # Skip the build process
            darknet_exe = DK_ROOT / "darknet"
            if darknet_exe.exists():
                print("✅ Darknet executable found")
                print("✅ Darknet is executable")
                print("\n🎯 Darknet build process completed!")
                print(f"📍 Darknet location: {DK_ROOT / 'darknet'}")
                print("🔄 You can now proceed to the next cell for dataset setup")
            # Exit early since we're keeping existing darknet
            exit_early = True
    except:
        print("⚠️  Could not test existing darknet - forcing rebuild")
        try:
            os.remove(DK_ROOT / "darknet")
        except:
            pass
        exit_early = False
else:
    exit_early = False

# Only proceed with build if we don't have a working GPU darknet
if not exit_early:
    print("🚀 Starting Darknet compilation with GPU support...")
    
    # Detect GPU
    gpu_on, gpu_probe_msg = detect_gpu()
    print(("🔋 GPU detected:" if gpu_on else "⚠️  No GPU detected:"), gpu_probe_msg)
    print(f"🔧 FORCE_GPU_BUILD = {FORCE_GPU_BUILD}")
    
    # Clone darknet source - remove existing and get fresh copy
    darknet_src = DK_ROOT / "darknet-src"
    if darknet_src.exists():
        print("🗑️  Removing existing darknet source for fresh build...")
        shutil.rmtree(darknet_src, ignore_errors=True)
    
    print("📥 Cloning Darknet repository...")
    try:
        clone_cmd = ["git", "clone", "--depth", "1", "https://github.com/AlexeyAB/darknet.git", str(darknet_src)]
        result = subprocess.run(clone_cmd, cwd=str(DK_ROOT), capture_output=True, text=True, timeout=300)
        if result.returncode != 0:
            print(f"❌ Git clone failed: {result.stderr}")
            raise Exception("Failed to clone darknet")
        print("✅ Darknet repository cloned successfully")
    except Exception as e:
        print(f"❌ Failed to clone darknet: {e}")
        raise
    
    # Change to darknet source directory
    os.chdir(str(darknet_src))
    
    # Normalize CRLF (prevents 'missing separator')
    subprocess.run("sed -i 's/\r$//' Makefile", shell=True, check=False)
    
    # Sanitize Makefile: remove stray 'rt' token if present
    makefile_path = Path("Makefile")
    mk = makefile_path.read_text()
    mk = re.sub(r"(?<=\s)rt(?=\s)", "", mk)  # drop bare 'rt'
    makefile_path.write_text(mk)
    
    # Find cuDNN
    def find_cudnn_libdir():
        candidates = [
            "/usr/local/cudnn/lib64",
            "/usr/lib/x86_64-linux-gnu",
            "/usr/local/cuda/lib64",
            "/opt/conda/lib",
            "/usr/lib64",
            "/lib/x86_64-linux-gnu"
        ]
        for d in candidates:
            if os.path.isdir(d):
                try:
                    files = os.listdir(d)
                    cudnn_files = [f for f in files if f.startswith("libcudnn")]
                    if cudnn_files:
                        print(f"ℹ️  Found cuDNN in {d}: {cudnn_files[:3]}...")
                        return d
                except Exception:
                    pass
        return None

    cudnn_dir = find_cudnn_libdir()
    use_cudnn = 1 if (gpu_on and cudnn_dir) else 0
    
    # Setup LDFLAGS
    extra_ld = [
        "-L/usr/local/cuda/lib64",
        "-L/usr/local/cuda/targets/x86_64-linux/lib",
        "-L/usr/lib/x86_64-linux-gnu",
        "-L/opt/conda/lib",
        "-L/usr/lib64",
        "-lcudart -lcublas -lcurand"
    ]
    
    # Handle libcuda
    libcuda_info = find_and_prepare_libcuda()
    if libcuda_info:
        if not libcuda_info.get("use_symlink"):
            extra_ld += [f"-L{libcuda_info['dir']}", "-Wl,-rpath," + libcuda_info['dir'], "-lcuda"]
            print(f"ℹ️  Using libcuda from {libcuda_info['dir']}")
        else:
            # Create symlink for versioned libcuda
            stubs_dir = str(DK_ROOT / "cuda_libcuda_stubs")
            os.makedirs(stubs_dir, exist_ok=True)
            verpath = libcuda_info.get("verpath") or os.path.join(libcuda_info["dir"], libcuda_info["verfile"])
            linkpath = os.path.join(stubs_dir, "libcuda.so")
            try:
                if os.path.islink(linkpath) or os.path.exists(linkpath):
                    os.remove(linkpath)
                os.symlink(verpath, linkpath)
                os.chmod(linkpath, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
                extra_ld += [f"-L{stubs_dir}", "-Wl,-rpath," + stubs_dir, "-lcuda"]
                print(f"ℹ️  Created local symlink for libcuda: {linkpath} -> {verpath}")
            except Exception as e:
                print("⚠️  Failed to create local libcuda symlink:", e)
                if libcuda_info["dir"].endswith("stubs"):
                    extra_ld += [f"-L{libcuda_info['dir']}", "-lcuda"]
                    print(f"ℹ️  Added stubs dir to LDFLAGS: {libcuda_info['dir']}")
    else:
        print("⚠️  libcuda not found in any common locations")
    
    # FORCE GPU BUILD - Don't fall back to CPU unless explicitly disabled
    if gpu_on and not libcuda_info and not FORCE_GPU_BUILD:
        print("⚠️  GPU device present but libcuda (driver) not found. Switching to CPU-only build.")
        gpu_on = False
        use_cudnn = 0
        extra_ld = ["-L/usr/lib/x86_64-linux-gnu"]
    elif gpu_on and FORCE_GPU_BUILD:
        print("🚀 FORCE_GPU_BUILD enabled - proceeding with GPU build")
        if not libcuda_info:
            print("    Adding CUDA stubs as fallback for missing libcuda...")
            extra_ld += ["-L/usr/local/cuda/lib64/stubs", "-lcuda"]
    
    # Add cuDNN if found
    if cudnn_dir:
        extra_ld += [f"-L{cudnn_dir}", "-lcudnn"]
    
    # Build flags - Use sm_75 for T4 GPUs
    arch = ' -gencode arch=compute_75,code=[sm_75,compute_75]'
    flags = f'GPU={1 if gpu_on else 0} CUDNN={use_cudnn} CUDNN_HALF={use_cudnn} OPENCV=0 ARCH="{arch}" LDFLAGS+=" {" ".join(extra_ld)} "'
    
    print(f"🔨 Building with GPU={1 if gpu_on else 0}, CUDNN={use_cudnn}")
    print(f"🔧 Compile flags: {flags}")
    
    # Build with progress
    def build_with_progress(flags):
        cmd = f"stdbuf -oL -eL make -j1 {flags}"  # Single-threaded for better error output
        print("$", cmd)
        p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                             text=True, bufsize=1)
        objs = set()
        pat = re.compile(r"-o\s+obj/([A-Za-z0-9_./-]+\.o)\b")
        total = len(list(Path("src").glob("*.c"))) + len(list(Path("src").glob("*.cu"))) or 120
        spin = "|/-\\"; si = 0; last = time.time()
        
        def bar(extra=""):
            done = len(objs); pct = min(100, int(done/max(1, total)*100)); L = 30; fill = int(L*pct/100)
            sys.stdout.write(f"\r[{'#'*fill}{'-'*(L-fill)}] {done}/{total} ({pct:3d}%) {spin[si%4]} {extra}   ")
            sys.stdout.flush()
        
        for line in p.stdout:
            print(line, end="")
            m = pat.search(line)
            if m and m.group(1) not in objs:
                objs.add(m.group(1)); si += 1; bar(f"→ {m.group(1)}"); last = time.time()
            elif time.time() - last > 2.0:
                si += 1; bar("compiling…"); last = time.time()
        
        rc = p.wait()
        si += 1; bar("linking/finishing…"); print()
        return rc
    
    # Single build attempt - no CPU fallback if FORCE_GPU_BUILD is enabled
    print("🔨 Starting compilation...")
    rc = build_with_progress(flags)
    
    if rc != 0 or not Path("darknet").exists():
        if gpu_on and FORCE_GPU_BUILD:
            print("\n⚠️  GPU build failed but FORCE_GPU_BUILD is enabled.")
            print("    Trying with clean build and simplified flags...")
            
            # Clean and try with simpler flags
            subprocess.run("make clean", shell=True, check=False)
            
            # Simplified flags for problematic environments
            simple_flags = f'GPU=1 CUDNN={use_cudnn} OPENCV=0 ARCH="{arch}"'
            print(f"🔄 Retrying with simplified flags: {simple_flags}")
            rc = build_with_progress(simple_flags)
            
            if rc != 0 or not Path("darknet").exists():
                print("❌ GPU build failed even with simplified flags.")
                raise Exception("Forced GPU build failed - check CUDA installation")
        else:
            print("❌ Build failed.")
            raise Exception("Darknet compilation failed")
    
    # Copy executable to working directory
    src_exe = Path("darknet")
    dst_exe = DK_ROOT / "darknet"
    if src_exe.exists():
        shutil.copy2(src_exe, dst_exe)
        os.chmod(dst_exe, 0o755)
        print(f"✅ Copied darknet executable to {dst_exe}")
    else:
        raise Exception("Compilation succeeded but executable not found")
    
    # Change back to working directory
    os.chdir(str(DK_ROOT))

    # Verify darknet executable
    darknet_exe = DK_ROOT / "darknet"
    if darknet_exe.exists():
        print("✅ Darknet executable found")
        
        # Test darknet with better GPU detection
        try:
            # First test basic functionality
            test_result = subprocess.run(
                [str(darknet_exe)], 
                capture_output=True, 
                text=True, 
                timeout=30,
                cwd=str(DK_ROOT),
                input="\n"
            )
            
            if test_result.returncode == 0 or "usage:" in test_result.stdout.lower():
                print("✅ Darknet executable works correctly")
                
                full_output = (test_result.stdout or "") + (test_result.stderr or "")
                if "GPU" in full_output:
                    if "isn't used" in full_output or "GPU is not used" in full_output:
                        print("⚠️  CPU-only version built (GPU isn't used)")
                    else:
                        print("🚀 GPU support confirmed in darknet!")
                        if "CUDA" in full_output:
                            print("   🎯 CUDA GPU acceleration enabled")
                else:
                    # Check compilation flags to see if GPU was enabled
                    if gpu_on and FORCE_GPU_BUILD:
                        print("🚀 GPU version built with FORCE_GPU_BUILD enabled")
                        print("   Note: GPU status may only show during actual training")
                    else:
                        print("ℹ️  Darknet built successfully")
                        
            else:
                print("⚠️  Darknet test gave unexpected output but executable exists")
                if gpu_on and FORCE_GPU_BUILD:
                    print("🚀 GPU version built (based on compilation flags)")
                    
        except Exception as e:
            print(f"⚠️  Could not test darknet: {e}")
            if gpu_on and FORCE_GPU_BUILD:
                print("🚀 GPU version built (based on compilation settings)")
            
        # Check file permissions
        if os.access(darknet_exe, os.X_OK):
            print("✅ Darknet is executable")
        else:
            print("⚠️  Fixing darknet permissions...")
            try:
                os.chmod(darknet_exe, 0o755)
                print("✅ Fixed darknet permissions")
            except Exception as e:
                print(f"❌ Could not fix permissions: {e}")
                
    else:
        print("❌ Darknet executable not found after build!")

    print("\n🎯 Darknet build process completed!")
    print(f"📍 Darknet location: {DK_ROOT / 'darknet'}")

    # Show final build summary
    if darknet_exe.exists():
        build_type = "GPU" if (gpu_on and FORCE_GPU_BUILD) else "CPU"
        print(f"🏗️  Build type: {build_type} version")
        if gpu_on and FORCE_GPU_BUILD:
            print(f"   GPU={1 if gpu_on else 0}, CUDNN={use_cudnn}, CUDA_ARCH=sm_75")
            print("   ⚡ Ready for GPU-accelerated training!")

    print("🔄 You can now proceed to the next cell for dataset setup")

⚠️  Environment detection cell not run, using fallback detection
Fallback: Using Kaggle path: /kaggle/working
🔧 Building Darknet with GPU support in: /kaggle/working
🚀 Starting Darknet compilation with GPU support...
🔋 GPU detected: /dev/nvidia0 present; /dev/nvidiactl present; /proc/driver/nvidia/gpus present (2 GPUs); /usr/local/cuda exists; nvcc found; nvidia-smi: 2 GPU(s); First GPU: GPU 0: Tesla T4 (UUID: GPU-a2ebabb5-237f-bc70-c874-766f0a96d42d); T4 detected - auto-enabling FORCE_GPU_BUILD
🔧 FORCE_GPU_BUILD = True
📥 Cloning Darknet repository...
✅ Darknet repository cloned successfully
ℹ️  Found cuDNN in /usr/lib/x86_64-linux-gnu: ['libcudnn_engines_runtime_compiled.so.9.2.1', 'libcudnn_engines_precompiled.so.9.2.1', 'libcudnn_heuristic.so.9.2.1']...
🔍 Searching for libcuda...
📋 ldconfig output:
	libcudart.so.12 (libc6,x86-64) => /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12
	libcudart.so (libc6,x86-64) => /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so

🔍 Checki

In [3]:
# =========================
# CELL 0 — Paths & Setup
# =========================
from pathlib import Path
import shutil, re, json
from tqdm.auto import tqdm

ROOT = Path("/kaggle/working")

# ---- FIRE DATASETS (keep 4th 'smoke' dataset skipped) ----
FIRE_INDOOR_A = Path("/kaggle/input/indoor-fire-v1-rgb1-darknet")          # keep
FIRE_HOME     = Path("/kaggle/input/home-fire-dataset")                     # keep
FIRE_YOLO     = Path("/kaggle/input/fire-detection-in-yolo-format")         # keep
FIRE_SMOKE    = Path("/kaggle/input/fire-smoke-indoor-v1i-darknet")         # SKIP (smoke out-of-scope)

# ---- PERSON DATASETS (3) ----
PERSON_PEOPLE = Path("/kaggle/input/people-detection")                      # keep
PERSON_SURV   = Path("/kaggle/input/surveillance-images-for-person-detection/person-3")  # keep
PERSON_HIVIS  = Path("/kaggle/input/yolo-highvis-and-person-detection-dataset/YOLO-HiVis-Data")  # keep (person only)

# ---- PRETRAIN / YOLO PACK (unchanged) ----
YOLO_FILES    = Path("/kaggle/input/yolo-files/yolov4tiny_export")
FIRE_MODEL    = Path("/kaggle/input/fire-model-enhanced-20251005-105236")   # optional pack you had

# ---- OUTPUT MERGED OBJ ----
OBJ = ROOT / "obj"
IMG_TRAIN = OBJ / "train"; IMG_VAL = OBJ / "valid"
for d in [IMG_TRAIN, IMG_VAL]:
    d.mkdir(parents=True, exist_ok=True)

# ---- Global classes (final model) ----
# 0=fire, 1=person
NAMES = ["fire", "person"]
(OBJ / "obj.names").write_text("\n".join(NAMES))

print("[VERIFY PATHS]")
for name, p in [
    ("FIRE_INDOOR_A", FIRE_INDOOR_A),
    ("FIRE_HOME", FIRE_HOME),
    ("FIRE_YOLO", FIRE_YOLO),
    ("FIRE_SMOKE(SKIP)", FIRE_SMOKE),
    ("PERSON_PEOPLE", PERSON_PEOPLE),
    ("PERSON_SURV", PERSON_SURV),
    ("PERSON_HIVIS", PERSON_HIVIS),
    ("YOLO_FILES", YOLO_FILES),
    ("FIRE_MODEL", FIRE_MODEL),
]:
    print(f"{name:18s}:", p.exists(), str(p))

print("\n[OUTPUT]")
print("OBJ:", OBJ)
print("obj.names:", (OBJ / 'obj.names').exists(), (OBJ / 'obj.names'))


[VERIFY PATHS]
FIRE_INDOOR_A     : True /kaggle/input/indoor-fire-v1-rgb1-darknet
FIRE_HOME         : True /kaggle/input/home-fire-dataset
FIRE_YOLO         : True /kaggle/input/fire-detection-in-yolo-format
FIRE_SMOKE(SKIP)  : True /kaggle/input/fire-smoke-indoor-v1i-darknet
PERSON_PEOPLE     : True /kaggle/input/people-detection
PERSON_SURV       : True /kaggle/input/surveillance-images-for-person-detection/person-3
PERSON_HIVIS      : True /kaggle/input/yolo-highvis-and-person-detection-dataset/YOLO-HiVis-Data
YOLO_FILES        : True /kaggle/input/yolo-files/yolov4tiny_export
FIRE_MODEL        : True /kaggle/input/fire-model-enhanced-20251005-105236

[OUTPUT]
OBJ: /kaggle/working/obj
obj.names: True /kaggle/working/obj/obj.names


In [4]:
# ================================
# CELL 1 — Ingest helpers (1→1.5)
# Uses ALL fire datasets (incl. fire-smoke set; keeps only FIRE boxes)
# Prints per-class totals for train/valid at the end.
# ================================
from pathlib import Path
import shutil, random
from tqdm.auto import tqdm
from collections import defaultdict

random.seed(0)

# -------- 1.0: tiny utils --------
def _is_image(p: Path):
    return p.suffix.lower() in {".jpg", ".jpeg", ".png", ".bmp"}

def _copy(img: Path, dst_dir: Path):
    dst = dst_dir / img.name
    if not dst.exists():
        shutil.copy2(img, dst)
    return dst

def _load_lines(lbl: Path):
    if not lbl.exists(): return []
    return [ln.strip() for ln in lbl.read_text().splitlines() if ln.strip()]

def _save_lines(lbl: Path, lines):
    if lines:
        lbl.write_text("\n".join(lines) + "\n")

def _remap_filter(lines, keep_src_ids, to_class_id):
    """
    Keep boxes whose class is in keep_src_ids.
    If keep_src_ids is None -> keep ALL classes.
    Remap kept class id to to_class_id (0 or 1).
    """
    out = []
    for ln in lines:
        sp = ln.split()
        if not sp:
            continue
        try:
            cid = int(sp[0])
        except:
            continue
        if (keep_src_ids is None) or (cid in keep_src_ids):
            sp[0] = str(to_class_id)
            out.append(" ".join(sp))
    return out

# -------- 1.1: dataset recipes (match your folders) --------
# FIRE (now includes the smoke dataset; we keep only its FIRE class, assumed class 0)
RECIPES_FIRE = [
    dict(name="FIRE_INDOOR_A", root=FIRE_INDOOR_A, splits={"train":"train", "val":"valid"}, rel=""),
    dict(name="FIRE_HOME",     root=FIRE_HOME,     splits={"train":"train", "val":"val"},   rel=""),
    dict(name="FIRE_YOLO",     root=FIRE_YOLO,     splits={"train":"train", "val":"valid"}, rel=""),
    dict(name="FIRE_SMOKE",    root=FIRE_SMOKE,    splits={"train":"train", "val":"valid"}, rel=""),  # keep fire only
]

# PERSON
RECIPES_PERSON = [
    # people-detection: accept any id present and remap to person=1
    dict(name="PERSON_PEOPLE", root=PERSON_PEOPLE, splits={"train":"train", "val":"valid"}, rel=""),
    # surveillance-images-for-person-detection/person-3/train only
    dict(name="PERSON_SURV",   root=PERSON_SURV.parent, splits={"train":"person-3/train", "val":None}, rel=""),
    # YOLO-HiVis-Data has images/labels under that folder
    dict(name="PERSON_HIVIS",  root=PERSON_HIVIS,  splits={"train":"images", "val":None}, rel=""),
]

# -------- 1.2: ingest function (handles images/labels layouts) --------
def _resolve_img_lbl_dirs(root: Path, sub: str):
    base = root / sub if sub else root
    if (base / "images").exists() and (base / "labels").exists():
        return base / "images", base / "labels"
    if (root / "images").exists() and (root / "labels").exists():
        cand_img = root / f"images/{sub}" if (root / f"images/{sub}").exists() else root / "images"
        cand_lbl = root / f"labels/{sub}" if (root / f"labels/{sub}").exists() else root / "labels"
        return cand_img, cand_lbl
    return base, base  # mixed folder

def ingest_yolo_dataset(recipe, keep_src_ids, global_class_id):
    name = recipe["name"]; root = recipe["root"]; splits = recipe["splits"]; rel=recipe["rel"]
    print(f"[INGEST] {name}")
    for split_key in ["train", "val"]:
        sub = splits.get(split_key)
        if not sub:
            continue
        img_dir, labels_dir = _resolve_img_lbl_dirs(root / rel, sub)
        if not img_dir.exists():
            print(f"  -> skip {split_key}: {img_dir} not found")
            continue
        dst_dir = IMG_TRAIN if split_key == "train" else IMG_VAL
        copied, kept_boxes = 0, 0
        imgs = [p for p in img_dir.rglob("*") if _is_image(p)]
        for img in tqdm(imgs, leave=False, desc=f"{name}:{split_key}"):
            lbl = labels_dir / f"{img.stem}.txt"
            lines = _load_lines(lbl)
            kept  = _remap_filter(lines, keep_src_ids, global_class_id)
            if not kept:
                continue
            dst_img = _copy(img, dst_dir)
            _save_lines(dst_img.with_suffix(".txt"), kept)
            copied += 1
            kept_boxes += len(kept)
        print(f"  -> {split_key}: {copied} imgs, {kept_boxes} kept boxes -> {dst_dir}")

# -------- 1.3: Run ingestion (fire=0, person=1) --------
print("[RUN] Ingest fire datasets")
for R in RECIPES_FIRE:
    # For the smoke dataset, we keep only FIRE class (commonly id 0). If their fire id differs, change [0] accordingly.
    keep_ids = [0]
    ingest_yolo_dataset(R, keep_src_ids=keep_ids, global_class_id=0)

print("\n[RUN] Ingest person datasets")
ingest_yolo_dataset(RECIPES_PERSON[0], keep_src_ids=None, global_class_id=1)   # PEOPLE (unknown id → keep all, remap to 1)
ingest_yolo_dataset(RECIPES_PERSON[1], keep_src_ids=[0],   global_class_id=1)  # SURV (person==0)
ingest_yolo_dataset(RECIPES_PERSON[2], keep_src_ids=[0],   global_class_id=1)  # HIVIS (change if person id differs)

# -------- 1.4: build train/val lists + obj.data --------
def write_list_file(list_path: Path, folder: Path):
    imgs = sorted([p for p in folder.glob("*") if _is_image(p)])
    list_path.write_text("\n".join(str(p) for p in imgs) + "\n")
    return len(imgs)

train_list = OBJ / "train.txt"
val_list   = OBJ / "valid.txt"
n_train = write_list_file(train_list, IMG_TRAIN)
n_val   = write_list_file(val_list,   IMG_VAL)

(OBJ / "obj.data").write_text(
    f"classes={len(NAMES)}\n"
    f"train={train_list}\n"
    f"valid={val_list}\n"
    f"names={OBJ/'obj.names'}\n"
    f"backup={ROOT/'backup'}\n"
)

print("\n[SUMMARY 1.4]")
print("train.txt:", n_train, train_list)
print("valid.txt:", n_val,   val_list)
print("obj.data :", (OBJ/'obj.data'))

# -------- 1.5: sanity peek + per-class totals --------
def peek_labels(folder: Path, k=5):
    i = 0
    for p in sorted(folder.glob("*.txt")):
        s = p.read_text().strip()
        print(" -", p.name, "->", (s[:120] + "...") if len(s) > 120 else s)
        i += 1
        if i >= k: break

def class_stats(folder: Path):
    imgs = [p for p in folder.glob("*") if _is_image(p)]
    img_has = defaultdict(int)  # images containing class
    box_cnt = defaultdict(int)  # total boxes per class
    for img in imgs:
        lbl = img.with_suffix(".txt")
        if not lbl.exists(): 
            continue
        seen = set()
        for ln in lbl.read_text().splitlines():
            if not ln.strip(): 
                continue
            cid = int(ln.split()[0])
            box_cnt[cid] += 1
            seen.add(cid)
        for cid in seen:
            img_has[cid] += 1
    return len(imgs), img_has, box_cnt

print("\n[PEEK 1.5] sample labels (train)")
peek_labels(IMG_TRAIN)
print("\n[PEEK 1.5] sample labels (valid)")
peek_labels(IMG_VAL)

# per-class totals (separate FIRE vs PERSON)
for split_name, folder in [("TRAIN", IMG_TRAIN), ("VALID", IMG_VAL)]:
    total_imgs, img_has, box_cnt = class_stats(folder)
    fire_imgs   = img_has.get(0, 0); fire_boxes   = box_cnt.get(0, 0)
    person_imgs = img_has.get(1, 0); person_boxes = box_cnt.get(1, 0)
    print(f"\n[{split_name}] totals — all classes")
    print(f"  total images            : {total_imgs}")
    print(f"  FIRE   -> images:{fire_imgs}  boxes:{fire_boxes}")
    print(f"  PERSON -> images:{person_imgs}  boxes:{person_boxes}")


[RUN] Ingest fire datasets
[INGEST] FIRE_INDOOR_A


FIRE_INDOOR_A:train:   0%|          | 0/399 [00:00<?, ?it/s]

  -> train: 15 imgs, 15 kept boxes -> /kaggle/working/obj/train


FIRE_INDOOR_A:val:   0%|          | 0/39 [00:00<?, ?it/s]

  -> val: 0 imgs, 0 kept boxes -> /kaggle/working/obj/valid
[INGEST] FIRE_HOME


FIRE_HOME:train:   0%|          | 0/3900 [00:00<?, ?it/s]

  -> train: 2763 imgs, 2978 kept boxes -> /kaggle/working/obj/train


FIRE_HOME:val:   0%|          | 0/1300 [00:00<?, ?it/s]

  -> val: 894 imgs, 963 kept boxes -> /kaggle/working/obj/valid
[INGEST] FIRE_YOLO


FIRE_YOLO:train:   0%|          | 0/243 [00:00<?, ?it/s]

  -> train: 243 imgs, 302 kept boxes -> /kaggle/working/obj/train


FIRE_YOLO:val:   0%|          | 0/16 [00:00<?, ?it/s]

  -> val: 16 imgs, 19 kept boxes -> /kaggle/working/obj/valid
[INGEST] FIRE_SMOKE


FIRE_SMOKE:train:   0%|          | 0/5250 [00:00<?, ?it/s]

  -> train: 4885 imgs, 6535 kept boxes -> /kaggle/working/obj/train


FIRE_SMOKE:val:   0%|          | 0/375 [00:00<?, ?it/s]

  -> val: 364 imgs, 499 kept boxes -> /kaggle/working/obj/valid

[RUN] Ingest person datasets
[INGEST] PERSON_PEOPLE


PERSON_PEOPLE:train:   0%|          | 0/15210 [00:00<?, ?it/s]

  -> train: 0 imgs, 0 kept boxes -> /kaggle/working/obj/train


PERSON_PEOPLE:val:   0%|          | 0/1431 [00:00<?, ?it/s]

  -> val: 0 imgs, 0 kept boxes -> /kaggle/working/obj/valid
[INGEST] PERSON_SURV


PERSON_SURV:train:   0%|          | 0/6603 [00:00<?, ?it/s]

  -> train: 5265 imgs, 12915 kept boxes -> /kaggle/working/obj/train
[INGEST] PERSON_HIVIS


PERSON_HIVIS:train:   0%|          | 0/7937 [00:00<?, ?it/s]

  -> train: 5404 imgs, 12703 kept boxes -> /kaggle/working/obj/train

[SUMMARY 1.4]
train.txt: 18575 /kaggle/working/obj/train.txt
valid.txt: 1274 /kaggle/working/obj/valid.txt
obj.data : /kaggle/working/obj/obj.data

[PEEK 1.5] sample labels (train)
 - -2022-04-05-195924_png_jpg.rf.1682133bbd8c43d33a8849116351aa5e.txt -> 0 0.8246402877697842 0.5269784172661871 0.35071942446043164 0.34532374100719426
 - -2022-04-05-195924_png_jpg.rf.4e629e956f1534540375e8b841573cf8.txt -> 0 0.2154255319148936 0.5709219858156028 0.34574468085106386 0.3173758865248227
 - -2022-04-05-195924_png_jpg.rf.8fc625750d30ecf2c1cce60b1c9d450a.txt -> 0 0.8078125 0.41015625 0.2953125 0.2703125
 - 0004342a-2333-11eb-9974-62abc2879d65_jpg.rf.0c41dc4c3ca2c6bfd193a32baea8930c.txt -> 1 0.42734375 0.9162037037037037 0.06875 0.1675925925925926
1 0.56484375 0.9638888888888889 0.05416666666666667 0.0722222...
 - 0005fa7820c9d395.txt -> 1 0.5796484375 0.42296908333333333 0.18562499999999998 0.25499999999999995

[PEEK 1.5] sam

In [5]:
# CELL 1.6A — Oversample FIRE in train.txt (keeps files as-is, just repeats fire images)
# Target ratio ~ 1:3 (fire:person). Adjust "factor" if you want stronger oversampling.

from pathlib import Path
import math

train_list = (ROOT/"train.txt").read_text().splitlines()

# Identify which images have class 0 (fire) vs class 1 (person)
def has_class(lbl_path: Path, cls: int) -> bool:
    if not lbl_path.exists(): 
        return False
    for line in lbl_path.read_text().splitlines():
        if not line.strip(): 
            continue
        try:
            if int(line.split()[0]) == cls:
                return True
        except:
            pass
    return False

fire_imgs, person_imgs, mixed = [], [], []
for p in map(Path, train_list):
    lbl = p.with_suffix(".txt")
    f = has_class(lbl, 0)
    h = has_class(lbl, 1)
    if f and h: mixed.append(p)
    elif f:     fire_imgs.append(p)
    elif h:     person_imgs.append(p)

print(f"[BALANCE] fire-only:{len(fire_imgs)} person-only:{len(person_imgs)} mixed:{len(mixed)}")

# Compute oversampling factor to approach ~1:3 class balance
# Rough estimate using label-row counts we printed earlier
fire_rows  = 749
person_rows= 12915
target_ratio = 3.0
needed = int(math.ceil((person_rows / target_ratio) / max(1, fire_rows)))
factor = max(1, needed)
print(f"[BALANCE] Oversampling fire by factor x{factor}")

# Build new train list: repeat fire images 'factor' times; keep mixed & person once
aug_list = []
aug_list += [str(p) for p in mixed]
aug_list += [str(p) for p in person_imgs]
for _ in range(factor):
    aug_list += [str(p) for p in fire_imgs]

# De-duplicate while preserving order
seen=set(); final_list=[]
for s in aug_list:
    if s not in seen:
        seen.add(s); final_list.append(s)

# Write back
(ROOT/"train.txt").write_text("\n".join(final_list))
print(f"[BALANCE] train.txt now has {len(final_list)} image entries")


[BALANCE] fire-only:5258 person-only:5265 mixed:0
[BALANCE] Oversampling fire by factor x6
[BALANCE] train.txt now has 10523 image entries


In [5]:
# ================================
# CELL 1.6 — Balance dataset to ~7,900 imgs/class
# - Downsample TRAIN so each class (fire=0, person=1) has up to TARGET images
# - Creates a small VALID split (VAL_FRAC per class) so both classes appear in val
# - Rewrites train.txt / valid.txt and prints fresh stats
# ================================
from pathlib import Path
import random, shutil
from collections import defaultdict

random.seed(0)

TARGET = 7900          # target images per class in TRAIN
VAL_FRAC = 0.10        # fraction of kept images per class to move into VALID

def is_img(p): return p.suffix.lower() in {".jpg",".jpeg",".png",".bmp"}

def img_classes(img: Path):
    """Return set of class ids present in this image (from its .txt)."""
    lbl = img.with_suffix(".txt")
    if not lbl.exists(): return set()
    s = set()
    for ln in lbl.read_text().splitlines():
        ln = ln.strip()
        if not ln: continue
        try:
            s.add(int(ln.split()[0]))
        except: 
            pass
    return s

# --- index TRAIN images by class presence
train_imgs = [p for p in IMG_TRAIN.glob("*") if is_img(p)]
both, fire_only, person_only = [], [], []
for img in train_imgs:
    cs = img_classes(img)
    if not cs: 
        continue
    if 0 in cs and 1 in cs:
        both.append(img)
    elif 0 in cs:
        fire_only.append(img)
    elif 1 in cs:
        person_only.append(img)

random.shuffle(both); random.shuffle(fire_only); random.shuffle(person_only)

keep = set()
fire_kept = 0
person_kept = 0

# 1) keep overlap first (helps both quotas)
for img in both:
    if fire_kept >= TARGET and person_kept >= TARGET: break
    keep.add(img)
    fire_kept += 1
    person_kept += 1

# 2) fill remaining fire quota
for img in fire_only:
    if fire_kept >= TARGET: break
    keep.add(img)
    fire_kept += 1

# 3) fill remaining person quota
for img in person_only:
    if person_kept >= TARGET: break
    keep.add(img)
    person_kept += 1

print(f"[BALANCE] selected TRAIN images -> fire:{fire_kept}, person:{person_kept}, total:{len(keep)}")

# --- delete unselected TRAIN images (and their .txt)
removed = 0
for img in train_imgs:
    if img not in keep:
        lbl = img.with_suffix(".txt")
        try:
            img.unlink()
            removed += 1
        except: pass
        try:
            if lbl.exists(): lbl.unlink()
        except: pass
print(f"[BALANCE] removed {removed} surplus TRAIN images")

# --- build a balanced VALID split by moving a fraction from TRAIN
def move_to_valid(candidates, quota):
    moved = 0
    random.shuffle(candidates)
    for img in candidates:
        if moved >= quota: break
        src_img, src_lbl = img, img.with_suffix(".txt")
        dst_img, dst_lbl = IMG_VAL / img.name, (IMG_VAL / img.name).with_suffix(".txt")
        if dst_img.exists(): continue
        shutil.move(str(src_img), str(dst_img))
        if src_lbl.exists():
            shutil.move(str(src_lbl), str(dst_lbl))
        moved += 1
    return moved

# recompute after pruning
train_imgs = [p for p in IMG_TRAIN.glob("*") if is_img(p)]
fire_imgs   = [p for p in train_imgs if 0 in img_classes(p)]
person_imgs = [p for p in train_imgs if 1 in img_classes(p)]

fire_quota_val   = max(1, int(len(fire_imgs) * VAL_FRAC))
person_quota_val = max(1, int(len(person_imgs) * VAL_FRAC))

moved_fire   = move_to_valid(fire_imgs,   fire_quota_val)
moved_person = move_to_valid(person_imgs, person_quota_val)
print(f"[VAL SPLIT] moved to VALID -> fire:{moved_fire}, person:{moved_person}")

# --- rewrite train.txt / valid.txt
def write_list_file(list_path: Path, folder: Path):
    imgs = sorted([p for p in folder.glob("*") if is_img(p)])
    list_path.write_text("\n".join(str(p) for p in imgs) + "\n")
    return len(imgs)

n_train = write_list_file(OBJ/"train.txt", IMG_TRAIN)
n_val   = write_list_file(OBJ/"valid.txt", IMG_VAL)
print("[LISTS] train.txt:", n_train, "| valid.txt:", n_val)

# --- final stats per split & class
def split_stats(folder: Path):
    imgs = [p for p in folder.glob("*") if is_img(p)]
    img_has = defaultdict(int)
    box_cnt = defaultdict(int)
    for img in imgs:
        lbl = img.with_suffix(".txt")
        if not lbl.exists(): 
            continue
        seen = set()
        for ln in lbl.read_text().splitlines():
            ln = ln.strip()
            if not ln: continue
            cid = int(ln.split()[0])
            box_cnt[cid] += 1
            seen.add(cid)
        for cid in seen: img_has[cid] += 1
    return len(imgs), img_has, box_cnt

for name, folder in [("TRAIN", IMG_TRAIN), ("VALID", IMG_VAL)]:
    total, has, boxes = split_stats(folder)
    f_imgs, p_imgs = has.get(0,0), has.get(1,0)
    f_box,  p_box  = boxes.get(0,0), boxes.get(1,0)
    print(f"\n[{name}] total images: {total}")
    print(f"  FIRE   -> images:{f_imgs}  boxes:{f_box}")
    print(f"  PERSON -> images:{p_imgs}  boxes:{p_box}")


[BALANCE] selected TRAIN images -> fire:7900, person:7900, total:15800
[BALANCE] removed 2775 surplus TRAIN images
[VAL SPLIT] moved to VALID -> fire:790, person:790
[LISTS] train.txt: 14220 | valid.txt: 2854

[TRAIN] total images: 14220
  FIRE   -> images:7110  boxes:8823
  PERSON -> images:7110  boxes:16901

[VALID] total images: 2854
  FIRE   -> images:2064  boxes:2482
  PERSON -> images:790  boxes:1901


In [10]:
# ================================
# CELL 2 — Write obj.names & obj.data (robust to resets)
# ================================
from pathlib import Path

# Re-create ROOT/OBJ if missing (e.g., after a restart)
try:
    ROOT
except NameError:
    ROOT = Path("/kaggle/working")
try:
    OBJ
except NameError:
    OBJ = ROOT / "obj"

(OBJ/"train").mkdir(parents=True, exist_ok=True)
(OBJ/"valid").mkdir(parents=True, exist_ok=True)
(ROOT/"backup").mkdir(exist_ok=True)

# Ensure train/valid lists exist (prefer OBJ lists; fall back to rebuild)
train_txt = OBJ/"train.txt"
valid_txt = OBJ/"valid.txt"

def _imgs_in(folder: Path):
    exts = {".jpg",".jpeg",".png",".bmp"}
    return sorted([p for p in folder.glob("*") if p.suffix.lower() in exts])

if not train_txt.exists():
    (train_txt).write_text("\n".join(map(str, _imgs_in(OBJ/"train"))) + "\n")
if not valid_txt.exists():
    (valid_txt).write_text("\n".join(map(str, _imgs_in(OBJ/"valid"))) + "\n")

# Write names/data (write to ROOT for Darknet, but point to OBJ lists)
(ROOT/"obj.names").write_text("fire\nperson\n")
(ROOT/"obj.data").write_text(
    f"classes= 2\n"
    f"train= {train_txt}\n"
    f"valid= {valid_txt}\n"
    f"names= {ROOT/'obj.names'}\n"
    f"backup= {ROOT/'backup'}\n"
)

print("[NAMES/DATA]")
print((ROOT/"obj.names").read_text())
print((ROOT/"obj.data").read_text())


[NAMES/DATA]
fire
person

classes= 2
train= /kaggle/working/obj/train.txt
valid= /kaggle/working/obj/valid.txt
names= /kaggle/working/obj.names
backup= /kaggle/working/backup



In [11]:
# ================================
# CELL 3 — Patch cfg to 2 classes (classes=2; filters=21 only before [yolo])
# ================================
import re
from pathlib import Path

candidates = [
    FIRE_MODEL/"cfg"/"yolov4-tiny-custom.cfg",
    FIRE_MODEL/"cfg"/"yolov4-tiny.cfg",
    YOLO_FILES/"cfg"/"yolov4-tiny.cfg",
]
base = next((p for p in candidates if p and p.exists()), None)
assert base and base.exists(), f"No base cfg found. Checked: {candidates}"

lines = base.read_text().splitlines()

num_classes = 2
num_filters = (num_classes + 5) * 3  # 21

# Walk through and edit only conv-before-[yolo] & [yolo] sections
last_conv_idx = -1
for i, ln in enumerate(lines):
    if ln.strip().lower() == "[convolutional]":
        last_conv_idx = i
    if ln.strip().lower() == "[yolo]":
        # patch classes inside this [yolo] block
        j = i + 1
        while j < len(lines) and not lines[j].strip().startswith("["):
            if re.match(r"^\s*classes\s*=\s*\d+\s*$", lines[j]):
                lines[j] = f"classes={num_classes}"
            j += 1
        # patch filters of conv right before this [yolo]
        j = last_conv_idx + 1
        while j < len(lines) and not lines[j].strip().startswith("["):
            if re.match(r"^\s*filters\s*=\s*\d+\s*$", lines[j]):
                lines[j] = f"filters={num_filters}"
            j += 1

# Also normalize any stray 'classes=' and 'filters=' lines if missing in target spots
txt = "\n".join(lines)

CFG = Path("/kaggle/working/yolov4-tiny-custom.cfg")
CFG.write_text(txt)
print("[CFG] Patched ->", CFG)


[CFG] Patched -> /kaggle/working/yolov4-tiny-custom.cfg


In [12]:
# ================================
# CELL 3A — Force a sane training schedule (max_batches & steps)
# ================================
import re
from pathlib import Path

CFG = Path("/kaggle/working/yolov4-tiny-custom.cfg")
assert CFG.exists(), f"Missing cfg: {CFG}"

txt = CFG.read_text()

# Ensure classes/filters are correct (safety)
txt = re.sub(r'(?mi)^\s*classes\s*=\s*\d+\s*$', 'classes=2', txt)
txt = re.sub(r'(?mi)^\s*filters\s*=\s*\d+\s*$', 'filters=21', txt)

# Schedule tuned for your balanced set (~7.9k/img per class) → 4k iters is fine for v4-tiny
MB = 4000
s1, s2 = 3200, 3600

if re.search(r'(?mi)^\s*max_batches\s*=', txt):
    txt = re.sub(r'(?mi)^\s*max_batches\s*=\s*\d+\s*$', f'max_batches={MB}', txt)
else:
    txt += f'\nmax_batches={MB}\n'

if re.search(r'(?mi)^\s*steps\s*=\s*\d+\s*,\s*\d+\s*$', txt):
    txt = re.sub(r'(?mi)^\s*steps\s*=\s*\d+,\s*\d+\s*$', f'steps={s1},{s2}', txt)
else:
    txt += f'\nsteps={s1},{s2}\n'

if not re.search(r'(?mi)^\s*burn_in\s*=\s*\d+\s*$', txt):
    txt += 'burn_in=1000\n'

CFG.write_text(txt)
print(f"[CFG] Forced max_batches={MB}, steps={s1},{s2}, burn_in=1000")


[CFG] Forced max_batches=4000, steps=3200,3600, burn_in=1000


In [13]:
# PURPOSE: Fully rebuild a correct YOLOv4-Tiny custom cfg file.
# Ensures backbone filters are restored (32, 64, 128...) and only
# conv-before-[yolo] layers use filters=21, with classes=2.

from pathlib import Path
import re, urllib.request, shutil

ROOT = Path("/kaggle/working")

# 1️⃣ Download a clean official YOLOv4-Tiny cfg from AlexeyAB repo
clean_cfg_url = "https://github.com/AlexeyAB/darknet/raw/master/cfg/yolov4-tiny.cfg"
base_cfg = ROOT / "yolov4-tiny-base.cfg"
urllib.request.urlretrieve(clean_cfg_url, base_cfg)
print(f"[CFG] Downloaded clean base: {base_cfg}")

# 2️⃣ Read cfg and prepare edits
cfg_lines = base_cfg.read_text().splitlines()

# Define your number of classes
num_classes = 2
num_filters = (num_classes + 5) * 3  # For YOLOv4 anchor formula = 21

# 3️⃣ Find and modify only the correct layers
yolo_indices = []
last_conv_idx = -1
for i, line in enumerate(cfg_lines):
    if line.strip().lower() == "[convolutional]":
        last_conv_idx = i
    elif line.strip().lower() == "[yolo]":
        # Modify the conv before this YOLO
        for j in range(last_conv_idx + 1, len(cfg_lines)):
            if cfg_lines[j].strip().startswith("["):
                break
            if re.match(r"^\s*filters\s*=", cfg_lines[j]):
                cfg_lines[j] = f"filters={num_filters}"
                break
        # Modify the classes inside YOLO block
        for j in range(i + 1, len(cfg_lines)):
            if cfg_lines[j].strip().startswith("["):
                break
            if re.match(r"^\s*classes\s*=", cfg_lines[j]):
                cfg_lines[j] = f"classes={num_classes}"
                break
        yolo_indices.append(i)

# 4️⃣ (Optional) Adjust training schedule for small dataset
txt = "\n".join(cfg_lines)
txt = re.sub(r'(?mi)^\s*max_batches\s*=.*$', 'max_batches=6000', txt)
txt = re.sub(r'(?mi)^\s*steps\s*=.*$', 'steps=4800,5400', txt)
if not re.search(r'(?mi)^\s*burn_in\s*=', txt):
    txt += '\nburn_in=1000\n'

# 5️⃣ Save the corrected file
CFG_FIXED = ROOT / "yolov4-tiny-custom.cfg"
CFG_FIXED.write_text(txt)
print(f"[CFG] ✅ Fixed YOLOv4-Tiny config written to: {CFG_FIXED}")

# 6️⃣ Quick sanity check – print first conv layers
print("\n[CHECK] First few convolution filters:")
for line in txt.splitlines():
    if re.match(r"^\s*filters\s*=\s*\d+", line):
        print(line)
print("\n[HINT] Early layers should have filters 32, 64, 128... and only two filters=21 before YOLO.")


[CFG] Downloaded clean base: /kaggle/working/yolov4-tiny-base.cfg
[CFG] ✅ Fixed YOLOv4-Tiny config written to: /kaggle/working/yolov4-tiny-custom.cfg

[CHECK] First few convolution filters:
filters=32
filters=64
filters=64
filters=32
filters=32
filters=64
filters=128
filters=64
filters=64
filters=128
filters=256
filters=128
filters=128
filters=256
filters=512
filters=256
filters=512
filters=21
filters=128
filters=256
filters=21

[HINT] Early layers should have filters 32, 64, 128... and only two filters=21 before YOLO.


In [14]:
# ================================
# CELL 4 — Choose start weights (prefer clean conv.29; fallback to your pack)
# ================================
from pathlib import Path
import shutil

conv_local = Path("/kaggle/working/yolov4-tiny.conv.29")

# Try to source conv.29 from your YOLO_FILES pack if present
if not conv_local.exists():
    for d in [YOLO_FILES/"weights", YOLO_FILES]:
        if d.exists():
            cand = next((p for p in d.glob("yolov4-tiny.conv*.weights")), None)
            if cand:
                shutil.copy2(cand, conv_local)
                break

# If still missing, CELL 4B will download it.
if conv_local.exists():
    print("[WEIGHTS] conv.29 ready at:", conv_local)
else:
    print("[WEIGHTS] conv.29 not found locally; will download in CELL 4B.")


[WEIGHTS] conv.29 not found locally; will download in CELL 4B.


In [15]:
# ================================
# CELL 4B — Download conv.29 if missing
# ================================
import subprocess
from pathlib import Path

conv_path = Path("/kaggle/working/yolov4-tiny.conv.29")
if not conv_path.exists():
    print("[INFO] Downloading yolov4-tiny.conv.29 pretrained weights...")
    url = "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29"
    subprocess.run(["wget", "-q", url, "-O", str(conv_path)], check=True)
else:
    print("[INFO] Pretrained weights already exist:", conv_path)

print("[DONE] conv29 ready at:", conv_path)


[INFO] Downloading yolov4-tiny.conv.29 pretrained weights...
[DONE] conv29 ready at: /kaggle/working/yolov4-tiny.conv.29


In [None]:
# ================================
# CELL 5 — Fresh Training from conv.29 (clean start)
# ================================
import subprocess, shlex, sys, time, re
from pathlib import Path

ROOT = Path("/kaggle/working")
LOGS = ROOT/"logs"; LOGS.mkdir(exist_ok=True)
CFG  = ROOT/"yolov4-tiny-custom.cfg"
DATA = ROOT/"obj.data"   # from CELL 2
assert CFG.exists(),  f"Missing cfg: {CFG}"
assert DATA.exists(), f"Missing data: {DATA}"

# Ensure schedule is 4k (safety)
cfg_text = CFG.read_text()
cfg_text = re.sub(r'(?mi)^\s*max_batches\s*=\s*\d+\s*$', 'max_batches=4000', cfg_text)
cfg_text = re.sub(r'(?mi)^\s*steps\s*=\s*\d+\s*,\s*\d+\s*$', 'steps=3200,3600', cfg_text)
CFG.write_text(cfg_text)
print("[CFG] max_batches confirmed at 4000 (steps 3200,3600)")

# Find darknet executable
candidates = [
    Path("/kaggle/working/darknet/darknet"),
    Path("/kaggle/working/darknet"),
]
exe = next((p for p in candidates if p.exists()), None)
assert exe is not None, "darknet executable not found at /kaggle/working/darknet"
run_cwd = exe.parent if exe.is_dir() else ROOT
exe_cmd = "./darknet" if exe.is_dir() else str(exe)

# Start from conv.29
conv_path = Path("/kaggle/working/yolov4-tiny.conv.29")
assert conv_path.exists(), f"Missing pretrained conv weights: {conv_path}"

# Read total iters from cfg (for progress bar)
m = re.search(r'(?mi)^\s*max_batches\s*=\s*(\d+)', CFG.read_text())
total_iter_cfg = int(m.group(1)) if m else 0

cmd = f'{exe_cmd} detector train {DATA} {CFG} {conv_path} -map -dont_show -gpus 0'
print("[CMD]", cmd)
print("[CWD]", run_cwd)

# One-line live progress
def one_line_bar(cur, total, eta_sec=None):
    total = max(total, 1)
    cur = max(0, min(cur, total))
    pct = int(cur * 100 / total)
    width = 30
    fill = int(width * cur / total)
    bar = "#" * fill + "-" * (width - fill)
    eta_txt = ""
    if eta_sec is not None and eta_sec > 0:
        h = int(eta_sec // 3600)
        m = int((eta_sec % 3600) // 60)
        s = int(eta_sec % 60)
        eta_txt = f" ETA {h:02d}h{m:02d}m{s:02d}s"
    return f"[{bar}] {cur}/{total} ({pct:3d}%)" + eta_txt

iter_pair_re = re.compile(r'^\s*(\d+)\s*/\s*(\d+)\s*:', re.IGNORECASE)
iter_single_re = re.compile(r'^\s*(\d+)\s*:', re.IGNORECASE)

cur_iter = 0
total_iter = total_iter_cfg
last_update = 0.0
start_t = time.time()

with open(LOGS/"training.log", "w") as lf:
    proc = subprocess.Popen(shlex.split(cmd), cwd=str(run_cwd),
                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            text=True, bufsize=1)
    try:
        for raw in proc.stdout:
            lf.write(raw)

            m = iter_pair_re.search(raw)
            if m:
                cur_iter = int(m.group(1))
                total_iter = int(m.group(2))
            else:
                m2 = iter_single_re.search(raw)
                if m2:
                    cur_iter = max(cur_iter, int(m2.group(1)))

            now = time.time()
            if now - last_update >= 1.0:
                eta_sec = None
                if cur_iter > 0 and (total_iter or total_iter_cfg):
                    tgt = total_iter if total_iter > 0 else total_iter_cfg
                    rate = cur_iter / max(now - start_t, 1e-9)
                    rem = max(tgt - cur_iter, 0) / max(rate, 1e-9)
                    eta_sec = int(rem)
                bar = one_line_bar(cur_iter, total_iter if total_iter > 0 else (total_iter_cfg or 1), eta_sec)
                sys.stdout.write("\r" + bar)
                sys.stdout.flush()
                last_update = now

        proc.wait()
    finally:
        try:
            proc.stdout.close()
        except Exception:
            pass

print("\n[DONE] Training finished (or stopped). Full log:", LOGS/"training.log")


[CFG] max_batches confirmed at 4000 (steps 3200,3600)
[CMD] /kaggle/working/darknet detector train /kaggle/working/obj.data /kaggle/working/yolov4-tiny-custom.cfg /kaggle/working/yolov4-tiny.conv.29 -map -dont_show -gpus 0
[CWD] /kaggle/working
[------------------------------] 86/4000 (  2%) ETA 01h46m34s

In [17]:
# CELL 6 — Export pack (weights + cfg + names + README zipped)

from pathlib import Path
import shutil, datetime, json

ROOT = Path("/kaggle/working")
EXPORT = ROOT/"export"
BACKUP = ROOT/"backup"
CFG    = ROOT/"yolov4-tiny-custom.cfg"
NAMES  = ROOT/"obj.names"
DATA   = ROOT/"obj.data"   # just referenced in README
EXPORT.mkdir(exist_ok=True)

# 1) Pick best weights
def pick_best():
    for pat in ("*_best.weights","*_final.weights","*_last.weights"):
        cand = sorted(BACKUP.glob(pat), key=lambda p: p.stat().st_mtime, reverse=True)
        if cand: return cand[0]
    cand = sorted(BACKUP.glob("*.weights"), key=lambda p: p.stat().st_mtime, reverse=True)
    return cand[0] if cand else None

best = pick_best()
assert best and best.exists(), "No weights found in /kaggle/working/backup."

# 2) Copy artifacts into /export
dst_weights = EXPORT/best.name
dst_cfg     = EXPORT/CFG.name
dst_names   = EXPORT/NAMES.name

shutil.copy2(best, dst_weights)
shutil.copy2(CFG,  dst_cfg)
shutil.copy2(NAMES, dst_names)

# 3) Minimal metadata + README
classes = [s.strip() for s in NAMES.read_text().splitlines() if s.strip()]
if len(classes) < 2:
    classes = ["fire", "person"]

meta = {
    "model": "yolov4-tiny",
    "classes": classes,
    "num_classes": len(classes),
    "exported_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "weights": dst_weights.name,
    "cfg": dst_cfg.name,
    "names": dst_names.name
}
(EXPORT/"metadata.json").write_text(json.dumps(meta, indent=2))

readme_lines = [
    "# PyroSense YOLOv4-tiny (2-class)",
    "",
    "**Classes (order matters):**",
    f"0: {classes[0]}",
    f"1: {classes[1]}",
    "",
    "## Files",
    f"- {dst_weights.name} — trained weights",
    f"- {dst_cfg.name} — network config (classes=2, filters=21 at both heads)",
    f"- {dst_names.name} — class labels (fire, person)",
    "- metadata.json — quick metadata",
    "",
    "## Darknet inference (example)",
    "Run these commands from the directory where the files live:",
    f"./darknet detector test {DATA} {dst_cfg.name} {dst_weights.name} -thresh 0.25 -ext_output -dont_show -map",
    "",
    "## Notes",
    "- Ensure your runtime has CUDA/cuDNN (GPU) or run with CPU build accordingly.",
    "- The model expects class IDs: fire=0, person=1.",
]
(EXPORT/"README.txt").write_text("\n".join(readme_lines))

# 4) Zip it
stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M")
zip_name = f"pyrosense-yolov4tiny-2class-{stamp}"
archive_path = shutil.make_archive(str(ROOT/zip_name), "zip", root_dir=str(EXPORT))
print("[EXPORT] Bundle created:", Path(archive_path))
print("[EXPORT] Contents:")
for p in sorted(EXPORT.iterdir()):
    print(" -", p.name)
print("[EXPORT] Zip path:", archive_path)


[EXPORT] Bundle created: /kaggle/working/pyrosense-yolov4tiny-2class-20251016-1558.zip
[EXPORT] Contents:
 - README.txt
 - metadata.json
 - obj.names
 - yolov4-tiny-custom.cfg
 - yolov4-tiny-custom_best.weights
[EXPORT] Zip path: /kaggle/working/pyrosense-yolov4tiny-2class-20251016-1558.zip


In [18]:
# CELL 7 (fixed) — mAP evaluation (robust Darknet path)

# PURPOSE: Evaluate mAP using the best/final weights. Robust to file-or-folder darknet path.

import subprocess, shlex
from pathlib import Path

ROOT = Path("/kaggle/working")
CFG  = ROOT/"yolov4-tiny-custom.cfg"
DATA = ROOT/"obj.data"
BACKUP = ROOT/"backup"
assert CFG.exists(),  f"Missing cfg: {CFG}"
assert DATA.exists(), f"Missing data: {DATA}"
assert BACKUP.exists(), "No /backup folder found."

# Resolve darknet executable path (file OR folder/binary)
candidates = [
    Path("/kaggle/working/darknet/darknet"),  # folder + binary
    Path("/kaggle/working/darknet"),          # single-file binary
]
exe = next((p for p in candidates if p.exists()), None)
assert exe is not None, "darknet executable not found at /kaggle/working/darknet"

if exe.is_file():
    run_cwd = ROOT
    exe_cmd = str(exe)
else:
    run_cwd = exe.parent
    exe_cmd = "./darknet"

# Pick best available weights
def pick_best():
    for pat in ("*_best.weights","*_final.weights","*_last.weights"):
        cand = sorted(BACKUP.glob(pat), key=lambda p: p.stat().st_mtime, reverse=True)
        if cand: return cand[0]
    cand = sorted(BACKUP.glob("*.weights"), key=lambda p: p.stat().st_mtime, reverse=True)
    return cand[0] if cand else None

best = pick_best()
assert best and best.exists(), "No weights found in /kaggle/working/backup."
print("[EVAL] Using:", best.name)

cmd = f"{exe_cmd} detector map {DATA} {CFG} {best}"
print("[CMD]", cmd)
print("[CWD]", run_cwd)

subprocess.run(shlex.split(cmd), cwd=str(run_cwd), check=False)


[EVAL] Using: yolov4-tiny-custom_best.weights
[CMD] /kaggle/working/darknet detector map /kaggle/working/obj.data /kaggle/working/yolov4-tiny-custom.cfg /kaggle/working/backup/yolov4-tiny-custom_best.weights
[CWD] /kaggle/working


 CUDA-version: 12050 (12060), cuDNN: 9.2.1, CUDNN_HALF=1, GPU count: 2  
 OpenCV isn't used - data augmentation will be slow 
 0 : compute_capability = 750, cudnn_half = 1, GPU: Tesla T4 
   layer   filters  size/strd(dil)      input                output
   0 conv     32       3 x 3/ 2    416 x 416 x   3 ->  208 x 208 x  32 0.075 BF
   1 conv     64       3 x 3/ 2    208 x 208 x  32 ->  104 x 104 x  64 0.399 BF
   2 conv     64       3 x 3/ 1    104 x 104 x  64 ->  104 x 104 x  64 0.797 BF
   3 route  2 		                       1/2 ->  104 x 104 x  32 
   4 conv     32       3 x 3/ 1    104 x 104 x  32 ->  104 x 104 x  32 0.199 BF
   5 conv     32       3 x 3/ 1    104 x 104 x  32 ->  104 x 104 x  32 0.199 BF
   6 route  5 4 	                           ->  104 x 104 x  64 
   7 

 CUDNN_HALF=1 


conv     64       1 x 1/ 1    104 x 104 x  64 ->  104 x 104 x  64 0.089 BF
   8 route  2 7 	                           ->  104 x 104 x 128 
   9 max                2x 2/ 2    104 x 104 x 128 ->   52 x  52 x 128 0.001 BF
  10 conv    128       3 x 3/ 1     52 x  52 x 128 ->   52 x  52 x 128 0.797 BF
  11 route  10 		                       1/2 ->   52 x  52 x  64 
  12 conv     64       3 x 3/ 1     52 x  52 x  64 ->   52 x  52 x  64 0.199 BF
  13 conv     64       3 x 3/ 1     52 x  52 x  64 ->   52 x  52 x  64 0.199 BF
  14 route  13 12 	                           ->   52 x  52 x 128 
  15 conv    128       1 x 1/ 1     52 x  52 x 128 ->   52 x  52 x 128 0.089 BF
  16 route  10 15 	                           ->   52 x  52 x 256 
  17 max                2x 2/ 2     52 x  52 x 256 ->   26 x  26 x 256 0.001 BF
  18 conv    256       3 x 3/ 1     26 x  26 x 256 ->   26 x  26 x 256 0.797 BF
  19 route  18 		                       1/2 ->   26 x  26 x 128 
  20 conv    128       3 x 3/ 1     

net.optimized_memory = 0 
mini_batch = 1, batch = 1, time_steps = 1, train = 0 
Create CUDA-stream - 0 
 Create cudnn-handle 0 
nms_kind: greedynms (1), beta = 0.600000 
nms_kind: greedynms (1), beta = 0.600000 


616Total Detection Time: 9 Seconds



 seen 64, trained: 256 K-images (4 Kilo-batches_64) 

 calculation mAP (mean average precision)...
 Detection layer: 30 - type = 28 
 Detection layer: 37 - type = 28 

 detections_count = 4631, unique_truth_count = 965  
class_id = 0, name = fire, ap = 78.53%   	 (TP = 412, FP = 91) 
class_id = 1, name = person, ap = 92.73%   	 (TP = 345, FP = 55) 

 for conf_thresh = 0.25, precision = 0.84, recall = 0.78, F1-score = 0.81 
 for conf_thresh = 0.25, TP = 757, FP = 146, FN = 208, average IoU = 64.30 % 

 IoU threshold = 50 %, used Area-Under-Curve for each unique Recall 
 mean average precision (mAP@0.50) = 0.856332, or 85.63 % 

Set -points flag:
 `-points 101` for MS COCO 
 `-points 11` for PascalVOC 2007 (uncomment `difficult` in voc.data) 
 `-points 0` (AUC) for ImageNet, PascalVOC 2010-2012, your custom dataset


CompletedProcess(args=['/kaggle/working/darknet', 'detector', 'map', '/kaggle/working/obj.data', '/kaggle/working/yolov4-tiny-custom.cfg', '/kaggle/working/backup/yolov4-tiny-custom_best.weights'], returncode=0)