# Mask R-CNN End-to-End (no shell calls)
Train, infer, and evaluate directly from Python so you can see what runs.

In [None]:
import sys
from pathlib import Path
import torch

sys.path.append("..");

from config import Config
from dataset import InstanceSegmentationDataset
from transforms import build_transforms
from model import build_model, load_for_inference
from engine import train_one_epoch, evaluate
from utils import collate_fn, save_checkpoint, set_seed
from infer import run_inference
from eval import run_eval

cfg = Config()
cfg.train_data_dir = Path("../data/train")
cfg.val_data_dir = Path("../data/val")
cfg.output_dir = Path("../outputs")
cfg.classes = ["__background__", "object"]  # edit for your data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
set_seed(cfg.seed)

## Data format reminder
- Images: `data/<split>/images/*.jpg|png`
- Annotations: `data/<split>/annotations/<image_stem>.json`
- JSON keys: `boxes`, `labels`, `polygons` (list of point lists), optional `iscrowd`.

Below we create a tiny synthetic dataset you can skip if you already have data.

In [None]:
import json, random
from PIL import Image, ImageDraw

def make_synthetic_split(split: str, count: int = 6):
    root = Path(f"../data/{split}")
    (root / "images").mkdir(parents=True, exist_ok=True)
    (root / "annotations").mkdir(parents=True, exist_ok=True)
    for i in range(count):
        img = Image.new("RGB", (256, 256), color="white")
        draw = ImageDraw.Draw(img)
        x1, y1 = random.randint(20, 80), random.randint(20, 80)
        x2, y2 = x1 + random.randint(60, 120), y1 + random.randint(60, 120)
        draw.rectangle([x1, y1, x2, y2], outline="red", width=3, fill="pink")
        img_path = root / "images" / f"img_{i:03d}.png"
        img.save(img_path)

        ann = {
            "boxes": [[x1, y1, x2, y2]],
            "labels": ["object"],
            "polygons": [[[x1, y1], [x2, y1], [x2, y2], [x1, y2]]],
            "iscrowd": [0],
        }
        with open(root / "annotations" / f"img_{i:03d}.json", "w") as f:
            json.dump(ann, f, indent=2)
    print(f"Synthetic {split} set written to {root}")

# comment out if you already have data
make_synthetic_split("train", 12)
make_synthetic_split("val", 4)

## Build datasets/dataloaders

In [None]:
train_ds = InstanceSegmentationDataset(cfg.train_data_dir, transforms=build_transforms(True, cfg.image_size), classes=cfg.classes)
val_ds = InstanceSegmentationDataset(cfg.val_data_dir, transforms=build_transforms(False, cfg.image_size, with_augs=False), classes=cfg.classes)

from torch.utils.data import DataLoader

train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers, collate_fn=collate_fn)

len(train_ds), len(val_ds)

## Train (explicit loop)
Runs a small number of epochs, logs losses, and saves a checkpoint.

In [None]:
from torch.optim.lr_scheduler import MultiStepLR

model = build_model(cfg).to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=cfg.learning_rate, momentum=cfg.momentum, weight_decay=cfg.weight_decay)
scheduler = MultiStepLR(optimizer, milestones=list(cfg.lr_steps), gamma=cfg.lr_gamma)
scaler = torch.cuda.amp.GradScaler(enabled=cfg.amp)

num_epochs = 2  # bump as needed
ckpt_path = cfg.output_dir / "model_notebook.pth"
cfg.output_dir.mkdir(parents=True, exist_ok=True)

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, train_loader, device, epoch, cfg, scaler)
    val_loss = evaluate(model, val_loader, device, cfg)
    scheduler.step()
    save_checkpoint({"model": model.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch + 1, "config": cfg}, ckpt_path)
    print(f"Epoch {epoch+1}: train_loss={train_loss:.4f} val_loss={val_loss:.4f} saved={ckpt_path}")

## Inference (Python call)
Loads the saved checkpoint and writes overlays/JSON to `../predictions`.

In [None]:
pred_dir = Path("../predictions")
model_infer = load_for_inference(cfg, checkpoint_path=str(ckpt_path), device=device)
images = list((cfg.val_data_dir / "images").glob("*.png")) or list((cfg.val_data_dir / "images").glob("*.jpg"))
results = run_inference(model_infer, images, pred_dir, device, cfg)
results[:1]  # show first prediction dict

## Evaluation (Python call)
Computes precision/recall/F1 at IoU=0.5 using `eval.run_eval`.

In [None]:
run_eval(cfg, ckpt_path, cfg.val_data_dir, device)