# Detectron2 Training - Organoid Detection

This notebook demonstrates fine-tuning a Detectron2 model for organoid detection. We'll use a Mask R-CNN model pre-trained on COCO and fine-tune it on a custom organoid dataset.

**Training Pipeline:**
1. Install Detectron2 and dependencies
2. Load and prepare the custom dataset
3. Configure and train the model
4. Evaluate performance

## 1. Installation and Setup

In [None]:
# Install dependencies
!python -m pip install pyyaml==5.1

import sys, os, distutils.core

# Install detectron2 (faster Colab installation method)
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

In [None]:
# Verify installation and check CUDA/PyTorch versions
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
# Import required libraries
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import json
import cv2
import random
from google.colab.patches import cv2_imshow

# Detectron2 imports
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

## 2. Data Preparation

In [None]:
# Mount Google Drive to access dataset
from google.colab import drive
drive.mount('/content/drive')

# Define paths
zip_path = "/content/drive/My Drive/Detectron2_Organoid_FineTuning/03_Data/Training_Data/organoid.zip" # organoid.zip no available on the drive, you have to create your own dataset following README_data.txt
extract_path = "/content/"

# Extract dataset
if os.path.exists(zip_path):
    print("File found, extracting...")
    !unzip -q "$zip_path" -d "$extract_path"
    print("Extraction complete!")
else:
    print("File not found at:", zip_path)

In [None]:
# Dataset loading function
def get_organoid_dicts(img_dir):
    """
    Load organoid dataset annotations from VIA JSON format.

    Args:
        img_dir: Directory containing images and via_region_data.json

    Returns:
        List of dataset dictionaries in Detectron2 format
    """
    json_file = os.path.join(img_dir, "via_region_data.json")
    with open(json_file) as f:
        imgs_anns = json.load(f)

    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        annos = v["regions"]
        objs = []
        for _, anno in annos.items():
            # Skip invalid annotations
            if anno['region_attributes']:
                continue

            anno = anno['shape_attributes']
            px = anno['all_points_x']
            py = anno['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]  # Flatten list

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,  # Single class: organoid
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [None]:
# Register datasets in Detectron2
for d in ["train", "val"]:
    DatasetCatalog.register("organoid_" + d, lambda d=d: get_organoid_dicts("organoid/" + d))
    MetadataCatalog.get("organoid_" + d).set(thing_classes=["organoid"])

organoid_metadata = MetadataCatalog.get("organoid_train")

In [None]:
# Visualize training samples
dataset_dicts = get_organoid_dicts("organoid/train")
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=organoid_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])

## 3. Training Configuration

In [None]:
# Configure the model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

# Dataset configuration
cfg.DATASETS.TRAIN = ("organoid_train",)
cfg.DATASETS.TEST = ()

# Data loader configuration
cfg.DATALOADER.NUM_WORKERS = 2

# Model weights - start from COCO pre-trained model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

# Training hyperparameters
cfg.SOLVER.IMS_PER_BATCH = 8
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 50
cfg.SOLVER.STEPS = []  # No learning rate decay

# Model configuration
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Single class: organoid

# Output directory
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

## 4. Training

In [None]:
# Train the model
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
# View training curves in TensorBoard
%load_ext tensorboard
%tensorboard --logdir output

In [None]:
# Save trained model to Google Drive
from google.colab import drive
drive.mount('/content/drive')

!cp -r output "/content/drive/My Drive/Detectron2_Organoid_FineTuning/02_Model/"

## 5. Evaluation

In [None]:
# Configure predictor for inference
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # Detection threshold
predictor = DefaultPredictor(cfg)

### Visual Evaluation on Validation Set

In [None]:
# Visualize predictions on random validation samples
from detectron2.utils.visualizer import ColorMode

dataset_dicts = get_organoid_dicts("organoid/val")
for d in random.sample(dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)

    v = Visualizer(im[:, :, ::-1],
                   metadata=organoid_metadata,
                   scale=0.5,
                   instance_mode=ColorMode.IMAGE_BW)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

### Quantitative Evaluation (COCO Metrics)

In [None]:
# Evaluate using COCO AP metrics
evaluator = COCOEvaluator("organoid_val", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "organoid_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))