# Video Processing Notebook for MegaDetector
## Version 1
### Authors: Bernedette Boscoe, Katherine Nunn
### Collaboraters: Karen

This notebook extracts frames from video files and processes them through MegaDetector for wildlife detection.

**System Requirements:**
- Windows OS
- Python 3.8+
- MegaDetector installed
- OpenCV

In [1]:
# Install required package if needed
!pip install opencv-python-headless



In [1]:
import os
import json
import shutil
import subprocess
import time
from pathlib import Path
import cv2
import tkinter as tk
from tkinter import filedialog as fd
#Megadetector imports
from megadetector.utils import path_utils
from megadetector.detection import video_utils
from megadetector.utils.ct_utils import write_json
from megadetector.visualization import visualization_utils as vis_utils
from megadetector.detection.video_utils import frame_results_to_video_results, FrameToVideoOptions

# Add the directory containing video_utils.py to the system path
import sys
sys.path.append("C:/Users/Public/Documents/MegaDetector_App/MegaDetector/megadetector/detection")

# Create a hidden root window for file dialogs
root = tk.Tk()
root.withdraw()
root.attributes('-topmost', True)

print("✅ All imports successful!")

✅ All imports successful!


## Configuration

Set up paths and detection parameters.

In [2]:
# MegaDetector installation path
MEGADETECTOR_ROOT = Path("C:/Users/Public/Documents/MegaDetector_App/MegaDetector")

# Model configuration
MODEL_NAME = "MDV5A"

# Detection parameters
ANIMAL_CATEGORY_ID = 1
CONF_THRESH = 0.5  # Confidence threshold (0.0 to 1.0)

# Verify MegaDetector is installed
assert (MEGADETECTOR_ROOT / "megadetector/detection/run_detector_batch.py").exists(), \
    f"MegaDetector not found at {MEGADETECTOR_ROOT}"

print(f"✅ MegaDetector found at: {MEGADETECTOR_ROOT}")
print(f"Model: {MODEL_NAME}")
print(f"Confidence threshold: {CONF_THRESH}")

✅ MegaDetector found at: C:\Users\Public\Documents\MegaDetector_App\MegaDetector
Model: MDV5A
Confidence threshold: 0.5


## Select Directories

Choose your input folder (containing videos) and output folder (where results will be saved).

In [None]:
# Select input folder containing video files
input_folder = fd.askdirectory(title="Select folder containing video files")
if not input_folder:
    raise ValueError("No input folder selected")

DATA_DIR = Path(input_folder)
print("Input folder:", DATA_DIR)

In [None]:
# Select output folder for all results
output_folder = fd.askdirectory(title="Select folder for output (detections, videos, etc.)")
if not output_folder:
    raise ValueError("No output folder selected")

OUTPUT_DIR = Path(output_folder)
OUTPUT_DIR.mkdir(exist_ok=True)
print("Output folder:", OUTPUT_DIR)

## Helper Functions

In [None]:
def find_all_videos(root):
    """Find all video files recursively."""
    exts = (".mp4", ".mov", ".avi", ".MP4", ".MOV", ".AVI")
    videos = []
    for r, _, files in os.walk(root):
        for f in files:
            if f.endswith(exts):
                videos.append(Path(r) / f)
    return sorted(videos)

all_videos = find_all_videos(DATA_DIR)
print(f"Found {len(all_videos)} videos")

# Show first 20 videos
for v in all_videos[:20]:
    print(f"  {v.name}")
    
if len(all_videos) > 20:
    print(f"  ... and {len(all_videos) - 20} more")

In [None]:
def resolve_frame_path(frames_dir, file_field):
    """Resolve the path to a frame file from MegaDetector JSON."""
    p = Path(file_field)
    if p.is_absolute() or p.exists():
        return p
    return frames_dir / p.name

In [None]:
def video_has_any_animal(images, conf_thresh=0.0):
    """Check if any frame in the video has animal detections above threshold."""
    for img in images:
        for det in img.get("detections", []):
            if int(det["category"]) == ANIMAL_CATEGORY_ID and det["conf"] >= conf_thresh:
                return True
    return False

## Process Videos

This cell processes each video individually:
1. Extract frames
2. Run MegaDetector
3. Draw bounding boxes on frames with detections
4. Create output video (only for videos WITH animal detections)
5. Clean up temporary frames

**Note:** You can test with a few videos first by uncommenting the test line in the loop.

In [None]:
batch_start = time.perf_counter()

videos_processed = 0
videos_with_animals = 0
videos_skipped = 0
videos_with_animals_list = []
videos_skipped_list = []

for video_path in all_videos:
# for video_path in all_videos[:3]:  # TESTING: Uncomment to process only first 3 videos

    print("\n" + "="*50)
    print("Processing:", video_path.name)
    print("="*50)

    # ----------------------------------------
    # Per-video output paths
    # ----------------------------------------
    video_stem = video_path.stem
    video_out_dir = OUTPUT_DIR / video_stem

    frames_dir = video_out_dir / "frames"
    json_path = video_out_dir / "detections.json"
    out_video = video_out_dir / f"{video_stem}_detected.mp4"

    video_out_dir.mkdir(parents=True, exist_ok=True)

    print("Frames:", frames_dir)
    print("JSON:", json_path)
    print("Output:", out_video)

    # ----------------------------------------
    # Clean frames dir
    # ----------------------------------------
    if frames_dir.exists():
        shutil.rmtree(frames_dir)
    frames_dir.mkdir()

    # ----------------------------------------
    # Extract frames
    # ----------------------------------------
    print("\n[1/4] Extracting frames...")
    cap = cv2.VideoCapture(str(video_path))
    fps = cap.get(cv2.CAP_PROP_FPS)

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imwrite(
            str(frames_dir / f"frame_{frame_idx:05d}.jpg"),
            frame
        )
        frame_idx += 1

    cap.release()
    print(f"  ✓ Extracted {frame_idx} frames @ {fps:.2f} FPS")

    if frame_idx == 0:
        print("  ✗ No frames extracted, skipping video")
        continue

    # ----------------------------------------
    # Run MegaDetector
    # ----------------------------------------
    print("\n[2/4] Running MegaDetector...")
    cmd = [
        "python",
        str(MEGADETECTOR_ROOT / "megadetector/detection/run_detector_batch.py"),
        MODEL_NAME,
        str(frames_dir),
        str(json_path),
    ]

    subprocess.run(cmd, check=True)
    print("  ✓ Detection complete")

    # ----------------------------------------
    # Load detections
    # ----------------------------------------
    with open(json_path) as f:
        md = json.load(f)

    images = md.get("images", [])
    print(f"  ✓ Loaded {len(images)} detection results")

    if len(images) != frame_idx:
        print(f"  ✗ Frame/JSON mismatch ({frame_idx} frames vs {len(images)} results), skipping")
        shutil.rmtree(frames_dir)
        continue

    # ----------------------------------------
    # Check for animal detections
    # ----------------------------------------
    print("\n[3/4] Checking for animal detections...")
    if not video_has_any_animal(images, CONF_THRESH):
        print(f"  ✗ No animal detections (conf >= {CONF_THRESH}) — skipping output video")
        shutil.rmtree(frames_dir)
        videos_skipped += 1
        videos_processed += 1
        videos_skipped_list.append(video_path.name)
        continue

    print(f"  ✓ Animals detected!")

    # ----------------------------------------
    # Initialize output video
    # ----------------------------------------
    print("\n[4/4] Creating output video with bounding boxes...")
    first_frame_path = resolve_frame_path(frames_dir, images[0]["file"])
    first_frame = cv2.imread(str(first_frame_path))

    if first_frame is None:
        print("  ✗ Cannot read first frame, skipping")
        continue

    h, w, _ = first_frame.shape

    out = cv2.VideoWriter(
        str(out_video),
        cv2.VideoWriter_fourcc(*"mp4v"),
        fps,
        (w, h),
    )

    # ----------------------------------------
    # Write video with bounding boxes
    # ----------------------------------------
    written = 0
    detections_drawn = 0

    for img in images:
        frame_path = resolve_frame_path(frames_dir, img["file"])
        frame = cv2.imread(str(frame_path))
        if frame is None:
            continue

        # Draw bounding boxes for animal detections
        for det in img.get("detections", []):
            if int(det["category"]) != ANIMAL_CATEGORY_ID:
                continue
            if det["conf"] < CONF_THRESH:
                continue

            x, y, bw, bh = det["bbox"]
            x1 = int(x * w)
            y1 = int(y * h)
            x2 = int((x + bw) * w)
            y2 = int((y + bh) * h)

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(
                frame,
                f"{det['conf']:.2f}",
                (x1, max(y1 - 5, 10)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                1,
            )
            detections_drawn += 1

        out.write(frame)
        written += 1

    out.release()
    print(f"  ✓ Wrote {written} frames with {detections_drawn} bounding boxes")
    print(f"  ✓ Output video: {out_video}")
    
    videos_with_animals += 1
    videos_processed += 1
    videos_with_animals_list.append(video_path.name)
    
    # ----------------------------------------
    # Cleanup frames
    # ----------------------------------------
    shutil.rmtree(frames_dir)
    print("  ✓ Cleaned up temporary frames")

print("\n" + "="*50)
print("PROCESSING COMPLETE")
print("="*50)

## Processing Summary

In [None]:
batch_end = time.perf_counter()
elapsed = batch_end - batch_start

print("\n" + "="*50)
print("BATCH PROCESSING SUMMARY")
print("="*50)
print(f"Total videos found     : {len(all_videos)}")
print(f"Videos processed       : {videos_processed}")
print(f"Videos with animals    : {videos_with_animals}")
print(f"Videos skipped (empty) : {videos_skipped}")
print(f"Total elapsed time     : {elapsed:.2f} seconds ({elapsed/60:.1f} minutes)")
print(f"Average per video      : {elapsed / max(videos_processed, 1):.2f} seconds")

print("\n" + "-"*50)
print("Videos WITH animal detections:")
print("-"*50)
if videos_with_animals_list:
    for v in videos_with_animals_list:
        print(f"  ✓ {v}")
else:
    print("  (none)")

print("\n" + "-"*50)
print("Videos with NO animal detections:")
print("-"*50)
if videos_skipped_list:
    for v in videos_skipped_list:
        print(f"  ✗ {v}")
else:
    print("  (none)")

print("\n" + "="*50)
print(f"Output directory: {OUTPUT_DIR}")
print("="*50)