# Mask R-CNN End-to-End Walkthrough

This notebook shows how to train, evaluate, and run inference with the lightweight Mask R-CNN setup in this repo.

In [None]:
# Install dependencies (uncomment if running in a fresh environment)
# !pip install -r ../requirements.txt

In [None]:
import sys
from pathlib import Path

sys.path.append("..");

from config import Config
from dataset import InstanceSegmentationDataset
from transforms import build_transforms
from model import load_for_inference

cfg = Config()
cfg.train_data_dir = Path("../data/train")
cfg.val_data_dir = Path("../data/val")
cfg.output_dir = Path("../outputs")
cfg.classes = ["__background__", "object"]  # edit to your classes

## Data format
Each split lives under `data/<split>/images` and `data/<split>/annotations`. Every image has a matching JSON named after the stem. Example annotation:
```json
{
  "boxes": [[x1, y1, x2, y2]],
  "labels": ["object"],
  "polygons": [
    [[x, y], [x, y], [x, y]]
  ],
  "iscrowd": [0]
}
```
Polygons are rasterized into binary masks; provide multiple polygons per instance if needed.

In [None]:
# Optional: create a tiny synthetic dataset for a smoke test
import json
import random
from PIL import Image, ImageDraw

def make_synthetic_split(split: str, count: int = 4):
    root = Path(f"../data/{split}")
    (root / "images").mkdir(parents=True, exist_ok=True)
    (root / "annotations").mkdir(parents=True, exist_ok=True)
    for i in range(count):
        img = Image.new("RGB", (256, 256), color="white")
        draw = ImageDraw.Draw(img)
        x1, y1 = random.randint(20, 80), random.randint(20, 80)
        x2, y2 = x1 + random.randint(60, 120), y1 + random.randint(60, 120)
        draw.rectangle([x1, y1, x2, y2], outline="red", width=3, fill="pink")
        img_path = root / "images" / f"img_{i:03d}.png"
        img.save(img_path)

        ann = {
            "boxes": [[x1, y1, x2, y2]],
            "labels": ["object"],
            "polygons": [[[x1, y1], [x2, y1], [x2, y2], [x1, y2]]],
            "iscrowd": [0],
        }
        with open(root / "annotations" / f"img_{i:03d}.json", "w") as f:
            json.dump(ann, f, indent=2)

make_synthetic_split("train", 12)
make_synthetic_split("val", 4)
print("Synthetic data ready under ../data")

In [None]:
# Train the model (adjust flags as needed)
!python ../train.py --train-data ../data/train --val-data ../data/val --output-dir ../outputs --epochs 2 --batch-size 2 --device cpu --no-amp

In [None]:
# Inference on validation images
!python ../infer.py --images ../data/val/images --checkpoint ../outputs/model_epoch_2.pth --output-dir ../predictions --device cpu --score-threshold 0.5

In [None]:
# Evaluate IoU@0.5 precision/recall
!python ../eval.py --data ../data/val --checkpoint ../outputs/model_epoch_2.pth --device cpu