# Training
The training set in COCO format is ready as per the [previous notebook](http://www.kaggle.com/persistentprogrammer/1-cell-instance-segmentation-data-processing) the mask_rcnn model will be used to train the data. As mentioned in the Detectron2 Model Zoo API the mask_rcnn model will be configured similar to the demo below:
 
from detectron2 import model_zoo
model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True)


In [1]:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [2]:
import detectron2
from pathlib import Path
import random, cv2, os
import matplotlib.pyplot as plt
import numpy as np
import pycocotools.mask as mask_util
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
setup_logger()

In [3]:
dataDir=Path('../input/sartorius-cell-instance-segmentation')
classes=["cort","shsy5y","astro"]
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
register_coco_instances('trn',{}, '../input/annotations-train-final/annotations_train_final.json',dataDir)
register_coco_instances('tst',{},'../input/annotations-train-final/annotations_train_final.json',dataDir)
metadata = MetadataCatalog.get('trn')
train_ds = DatasetCatalog.get('trn')
#test_ds = DatasetCatalog.get('testing')

In [4]:
d = train_ds[43]
#print(d['file_name'])

#print(d['category'])
img = cv2.imread(d["file_name"])
#print(img)
visualizer = Visualizer(img[:, :, ::-1], metadata=metadata)
out = visualizer.draw_dataset_dict(d)
plt.figure(figsize = (20,15))
plt.imshow(out.get_image()[:, :, ::-1])



In [5]:
# Taken from https://www.kaggle.com/theoviel/competition-metric-map-iou
def precision_at(threshold, iou):
    matches = iou > threshold
    true_positives = np.sum(matches, axis=1) == 1  # Correct objects
    false_positives = np.sum(matches, axis=0) == 0  # Missed objects
    false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_masks = pred['instances'].pred_masks.cpu().numpy()
    enc_preds = [mask_util.encode(np.asarray(p, order='F')) for p in pred_masks]
    enc_targs = list(map(lambda x:x['segmentation'], targ))
    ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, ious)
        p = tp / (tp + fp + fn)
        prec.append(p)
    return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"MaP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)
    

# About to Train

Training for 1000 iterations here for demonstration. For a high scoring model you will need to train it longer, closer to 10000 with these settings

Logging the IOUs Recorded : cfg.SOLVER.MAX_ITER = 1000 
*  MaP IoU=0.035016617532343634
*  MaP IoU=0.04770699336519656
*  MaP IoU=0.048392998848391405 
*  MaP IoU=0.05864819286263821

Logging the IOUs Recorded : cfg.SOLVER.MAX_ITER = 10000

* MaP IoU=0.028634955952136568
* MaP IoU=0.15530580192694832
* MaP IoU=0.17086983159217772
* MaP IoU=0.1781788702498427
* MaP IoU=0.18471983982629933
* MaP IoU=0.1845142503010725
* MaP IoU=0.2000371526214604
* MaP IoU=0.20045519766568573
* MaP IoU=0.178798912881747
* MaP IoU=0.20668270725942287
* MaP IoU=0.20167612555344858
* MaP IoU=0.19271649565679386
* MaP IoU=0.21021607295139286
* MaP IoU=0.20472970888750816
* MaP IoU=0.2185385851489974
* MaP IoU=0.209953043959278
* MaP IoU=0.21418847583015643
* MaP IoU=0.20984628329022967
* MaP IoU=0.21702466273329088
* MaP IoU=0.21758110819464557
* MaP IoU=0.21175760099773852
* MaP IoU=0.21746751301093561
* MaP IoU=0.2189431436334014
* MaP IoU=0.22504687031173892
* MaP IoU=0.20858281465713077
* MaP IoU=0.22820816934400004
* MaP IoU=0.23128993104426615
* MaP IoU=0.22780056091926768
* MaP IoU=0.2201527715590112
* MaP IoU=0.2344485734153959
* MaP IoU=0.21639908616326695
* MaP IoU=0.22350789366740792
* MaP IoU=0.23380872440868186
* MaP IoU=0.23504314985522248

In [6]:
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("trn",)
cfg.DATASETS.TEST = ("tst",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0005 
cfg.SOLVER.MAX_ITER = 10000   
cfg.SOLVER.STEPS = []        
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('trn')) // cfg.SOLVER.IMS_PER_BATCH  # Once per epoch

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

 # Predictions on the left and ground truth on the right

In [7]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)
dataset_dict_test = DatasetCatalog.get('tst')
#outs = []
for d in random.sample(dataset_dict_test, 30):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata = MetadataCatalog.get('trn'), 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize = (50, 80))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()
    '''
    out_target = visualizer.draw_dataset_dict(d)
    outs.append(out_pred)
    outs.append(out_target)
_,axs = plt.subplots(len(outs)//2,2,figsize=(40,45))
for ax, out in zip(axs.reshape(-1), outs):
    ax.imshow(out.get_image()[:, :, ::-1])
    
    '''
    