# Install Detectron2
Requirement for running Detectron2 include:
* gcc & g++ ≥ 5
* Python ≥ 3.6
* PyTorch ≥ 1.4
* torchvision that matches the PyTorch installation
* OpenCV
* pycocotools: pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
* fvcore: conda install -c fvcore fvcore
* Detectron2:  pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html


In [None]:
import numpy as np
import nibabel as nib
import pandas as pd
import os
import cv2
import itertools
import random
import glob
import torch
import time
import datetime
import logging
import json

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from skimage import data
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.measure import regionprops_table
from skimage.morphology import closing, square
from skimage.color import label2rgb, rgb2gray
from skimage.measure import find_contours, approximate_polygon

import detectron2
import detectron2.utils.comm as comm
from detectron2.utils.logger import setup_logger, log_every_n_seconds
setup_logger()

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from detectron2.engine.hooks import HookBase
from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, DatasetMapper, build_detection_test_loader
from detectron2.utils.visualizer import ColorMode
from detectron2.utils.visualizer import Visualizer
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

# Import and preprocessing data 
* RGB_30frames_D5_patch00: raw data
* ANNO_D5_patch00: cell nuclei annotation

We put both Nifti and PNG files in Google drive.

In [None]:
# Set train:validation:test = 18:6:6 frames
data_path = 'data_nifti/'
RGB_img = nib.load(data_path + 'RGB_30frames_D5_patch00.nii').get_data()
ANNO_img = nib.load(data_path + 'ANNO_D5_patch00.nii').get_fdata()
data_num = RGB_img.shape[2]

os.makedirs('data_PNG/train', exist_ok=True)
os.makedirs('data_PNG/val', exist_ok=True)
os.makedirs('data/PNG/test', exist_ok=True)

train_path = 'data/PNG/train/'
val_path = 'data/PNG/val/'
test_path = 'data/PNG/test/'

# Save raw data as PNG files 
for i in range(data_num):
    RGB_image = RGB_img[:,:,i]
    RGB_frames = RGB_image.view((np.uint8, len(RGB_image.dtype.names)))
    if i < 18:
        plt.imsave(train_path + 'RGB_frame_' + str(i) + '.png', RGB_frames, format = 'png')
    elif i < 24:
        plt.imsave(val_path + 'RGB_frame_' + str(i) + '.png', RGB_frames, format = 'png')
    else: 
        plt.imsave(test_path + 'RGB_frame_' + str(i) + '.png', RGB_frames, format = 'png')

Find contours of each neuclei from annotation files and save them in ANNO_train and ANNO_val. 

In [None]:
ANNO_train = {}
ANNO_val = {}

for i in range(data_num): 
    ANNO_image = ANNO_img[:,:,i]
    gray_img = rgb2gray(ANNO_image)
    contours = find_contours(gray_img, 0.8)
    frame = 'RGB_frame_' + str(i)
    cell_per_frame={}
  
    for n, contour in enumerate(contours):
        cell = 'cell_' + str(n)
        tmp = {'x' : contour[:,1],'y': contour[:, 0]}
        cell_per_frame.update({cell: tmp})

    if i < 18:
        ANNO_train.update({frame: cell_per_frame})
    elif i < 24:  
        ANNO_val.update({frame: cell_per_frame})

Register the training and validation dataset to detectron2. Write a function to parse it and prepare it into detectron2's standard format.

Information for each nuceli must include file_name (path of image), image ID, height and width of image size, bbox (bounding box of nuclei) and segmentation (contours of nuclei).

In [None]:
def get_train_dicts(img_dir):
    dataset_dicts = []
    for i in range(18): 
        key = 'RGB_frame_' + str(i)       
        record = {}       
        filename = (train_path + key + '.png')
        height, width = ANNO_image.shape[:2]
           
        record['file_name'] = filename
        record['image_id'] = i
        record['height'] = height
        record['width'] = width

        objs = []
        for idx, (cell, v) in enumerate(ANNO_train[key].items()):
            px = v['x']
            py = v['y']            
            poly = [(x  , y  )for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            obj = {               
                  'bbox': [np.min(px), np.min(py), np.max(px), np.max(py)],
                  'bbox_mode': BoxMode.XYXY_ABS,
                  'segmentation': [poly],
                  'category_id': 0,
                  'iscrowd': 0
                   }       
            objs.append(obj)

        record['annotations'] = objs
        dataset_dicts.append(record)
    return(dataset_dicts)
  
def get_val_dicts(img_dir):
    dataset_dicts = []
    for i in range(18,24): 
        key = 'RGB_frame_' + str(i)       
        record = {}       
        filename = (val_path + key + '.png')
        height, width = ANNO_image.shape[:2]
           
        record['file_name'] = filename
        record['image_id'] = i
        record['height'] = height
        record['width'] = width

        objs = []
        for idx, (cell, v) in enumerate(ANNO_val[key].items()):
            px = v['x']
            py = v['y']           
            poly = [(x  , y  )for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            obj = {               
                  'bbox': [np.min(px), np.min(py), np.max(px), np.max(py)],
                  'bbox_mode': BoxMode.XYXY_ABS,
                  'segmentation': [poly],
                  'category_id': 0,
                  'iscrowd': 0
                   }      
            objs.append(obj)

        record['annotations'] = objs
        dataset_dicts.append(record)
    return(dataset_dicts)

In [None]:
for d in ['train']:
    s = '2'
    DatasetCatalog.register( d + '_cell' + s, lambda d=d: get_train_dicts(train_path))
    MetadataCatalog.get( d + '_cell' + s).set(thing_classes=['cell'])
cell_metadata = MetadataCatalog.get('train_cell' + s)
for d in ['val']:
    DatasetCatalog.register( d + '_cell' + s, lambda d=d: get_val_dicts(val_path))
    MetadataCatalog.get( d + '_cell' + s).set(thing_classes=['cell'])

To verify the data loading is correct, visualize the annotations of randomly selected samples in the training set.

In [None]:
# Use this function to visualize data through cv2
def cv2_imshow(a, **kwargs):
    a = a.clip(0, 255).astype('uint16')
    # cv2 stores colors as BGR; convert to RGB
    if a.ndim == 3:
        if a.shape[2] == 4:
            a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
        else:
            a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
    return plt.imshow(a, **kwargs)

In [None]:
dataset_dicts = get_train_dicts(train_path)

k = random.randint(1,5)
for d in range(k):
    index = random.randint(0, 17)
    img = cv2.imread(train_path+'RGB_frame_'+str(index)+'.png')
    visualizer = Visualizer(img, metadata=cell_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(dataset_dicts[index])
    fig = plt.subplots(figsize=(20,20),dpi=300)
    plt.axis('off')
    cv2_imshow(vis.get_image())

# Train the dataset
To avoid overfitting, write our own trainer to train with validation dataset.

In [None]:
class LossEvalHook(HookBase):
    def __init__(self, eval_period, model, data_loader):
        self._model = model
        self._period = eval_period
        self._data_loader = data_loader
    
    def _do_loss_eval(self):
        # Copying inference_on_dataset from evaluator.py
        total = len(self._data_loader)
        num_warmup = min(5, total - 1)
            
        start_time = time.perf_counter()
        total_compute_time = 0
        losses = []
        for idx, inputs in enumerate(self._data_loader):            
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            start_compute_time = time.perf_counter()
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    'Loss on Validation  done {}/{}. {:.4f} s / img. ETA={}'.format(
                        idx + 1, total, seconds_per_img, str(eta)),
                    n=5,)
            loss_batch = self._get_loss(inputs)
            losses.append(loss_batch)
        mean_loss = np.mean(losses)
        self.trainer.storage.put_scalar('validation_loss', mean_loss)
        comm.synchronize()
        return losses
            
    def _get_loss(self, data):
        # How loss is calculated on train_loop 
        metrics_dict = self._model(data)
        metrics_dict = {
            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
            for k, v in metrics_dict.items()
        }
        total_losses_reduced = sum(loss for loss in metrics_dict.values())
        return total_losses_reduced
        
        
    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self._do_loss_eval()
        self.trainer.storage.put_scalars(timetest=12)



class MyTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, 'inference')
        return COCOEvaluator(dataset_name, cfg, True, output_folder)
                     
    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1,LossEvalHook(
            cfg.TEST.EVAL_PERIOD,
            self.model,
            build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],
                DatasetMapper(self.cfg,True))))
        return hooks

Fine-tune a coco-pretrained R50-FPN Mask R-CNN model on our own dataset.

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'))
cfg.DATASETS.TRAIN = ('train_cell'+s,)
cfg.DATASETS.TEST = ('val_cell'+s,)
cfg.TEST.EVAL_PERIOD = 100  # evaluation period 
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')  # Training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  #Batch size for ResNet
cfg.SOLVER.BASE_LR = 0.00025  # Learning rate
cfg.SOLVER.MAX_ITER = 100    # Iterations 
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # Batch size for ROI
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # We only has one class (cell)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Visualize the results
Plot training and validation loss.

In [None]:
def load_json_arr(json_path):
    lines = []
    with open(json_path, 'r') as f:
        for line in f:
            lines.append(json.loads(line))
    return lines

experiment_metrics = load_json_arr(cfg.OUTPUT_DIR+'/metrics.json')
plt.figure(dpi=200)
plt.plot(
    [x['iteration'] for x in experiment_metrics], 
    [x['total_loss'] for x in experiment_metrics])
plt.plot(
    [x['iteration'] for x in experiment_metrics if 'validation_loss' in x], 
    [x['validation_loss'] for x in experiment_metrics if 'validation_loss' in x])
plt.legend(['total_loss', 'validation_loss'], loc='upper left')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.show()

Making predictions by loading the model and setting a minimum threshold of 70% certainty at which we’ll consider the predictions as correct.

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
predictor = DefaultPredictor(cfg)

Create a folder and save all images with predicted annotations in the test set.

In [None]:
os.makedirs('results', exist_ok=True)
for d in range(24,30):
    img = cv2.imread(test_path+'RGB_frame_'+str(d)+'.png')
    outputs = predictor(img)
    v = Visualizer(img[:, :, ::-1],
                   metadata=cell_metadata, 
                   scale=1, 
                   instance_mode=ColorMode.IMAGE)   # remove the colors of unsegmented pixels
    v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
    fig = plt.subplots(figsize=(20,20),dpi=300)
    plt.axis('off')    
    cv2_imshow(v.get_image()[:, :, ::-1])
    plt.imsave('./results/test_frame_'+str(d)+'.png',v.get_image()[:, :, ::-1], format = 'png')