#### ✅ Step 1.2: Preprocess the ModaNet Dataset

In [None]:
import json
import cv2
import numpy as np
import os

DATASET_PATH = './ModaNet/annotations'
IMG_PATH = './ModaNet/images'

with open(os.path.join(DATASET_PATH, 'modanet.json')) as f:
    annotations = json.load(f)

# Example for bounding boxes and masks
for ann in annotations['annotations']:
    img_id = ann['image_id']
    bbox = ann['bbox']  # [x, y, width, height]
    segmentation = ann['segmentation']  # Polygon points

    img = cv2.imread(os.path.join(IMG_PATH, f"{img_id}.jpg"))
    x, y, w, h = bbox
    cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)

    mask = np.zeros(img.shape[:2], dtype=np.uint8)
    for seg in segmentation:
        points = np.array(seg, np.int32).reshape((-1, 2))
        cv2.fillPoly(mask, [points], (255))

    cv2.imshow('Image with Mask', mask)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

#### ✅ Step 1.3: Train Faster R-CNN + Mask R-CNN (PyTorch)


In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("modanet_train",)
cfg.DATASETS.TEST = ("modanet_val",)
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()


✅ Step 1.4: Test the Model


In [None]:
from detectron2.engine import DefaultPredictor

predictor = DefaultPredictor(cfg)
output = predictor(img)

# Visualize results
from detectron2.utils.visualizer import Visualizer
v = Visualizer(img[:, :, ::-1])
v = v.draw_instance_predictions(output["instances"].to("cpu"))
cv2.imshow("Prediction", v.get_image()[:, :, ::-1])
cv2.waitKey(0)
