# 0. Import Dependences

In [None]:
import detectron2, cv2, random
import os, json, itertools
import numpy as np
import torch, torchvision
from detectron2.utils.logger import setup_logger
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from matplotlib import pyplot as plt

setup_logger()

# 1. Create a function that change COCO Dataset (Input) format

In [3]:
#image_dir = '/home/gus/SORCOM/Python2/DatasetTraining'

def Get_EyeFundus_Data(image_dir):
    json_file = os.path.join(image_dir, "COCO_Dataset_IzqIzq.json")
    json_open = open(json_file)
    imgs_anns = json.load(json_open)


    images = imgs_anns["images"]
    annotations = imgs_anns["annotations"]

    n = 0
    array = []

    for i in annotations:
        record = {}
        array_anno = []
        filename = img_dir + "/" + images[n]['file_name'].split("/")[-1]
        height = images[n]['height']
        width = images[n]['width']

        record["file_name"] = filename
        record["height"] = height
        record["width"] = width

        record_anno = {}
        #print(n)
        bbox = annotations[n]['bbox']
        bbox_mode = BoxMode.XYXY_ABS
        segmentation = annotations[n]['segmentation']
        category_id = 0
        iscrowd = 0
        record_anno['bbox_mode'] = bbox_mode
        record_anno['segmentation'] = segmentation
     
        max_x = 0
        min_x = 9999999999
        for i in range(len(segmentation[0])):
            if i%2 == 0:
                if segmentation[0][i]<min_x:
                    min_x = segmentation[0][i]
                if segmentation[0][i]>max_x:
                    max_x = segmentation[0][i]
                    
        max_y = 0
        min_y = 9999999999
        for i in range(len(segmentation[0])):
            if i%2 != 0:
                if segmentation[0][i]<min_y:
                    min_y = segmentation[0][i]
                if segmentation[0][i]>max_y:
                    max_y = segmentation[0][i]
        
        record_anno['bbox'] = [min_x,min_y,max_x,max_y]
        record_anno['category_id'] = category_id
        record_anno['iscrowd'] = iscrowd

        array_anno.append(record_anno)
        record["annotations"] = array_anno
        array.append(record)

        n = n+1

    return array

# 2. Register EyeFundus Dataset (Training and Test)


In [4]:
for d in ["DatasetTraining", "DatasetTest"]:
    DatasetCatalog.register(d, lambda d=d: Get_EyeFundus_Data(d))
    MetadataCatalog.get(d).set(thing_classes=["EyeFundus"])
eyefundus_metadata = MetadataCatalog.get('DatasetTraining')

# 3. Visualize Dataset with the new format

In [6]:
dataset_dicts = Get_EyeFundus_Data("DatasetTraining")
for d in random.sample(dataset_dicts, 3):
    print(d)
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=eyefundus_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2.imshow('visgetimage', vis.get_image()[:, :, ::-1])
    cv2.waitKey(0)
cv2.destroyAllWindows()

{'file_name': 'DatasetIzqIzq/3video711.jpg', 'height': 1920, 'width': 1080, 'annotations': [{'bbox_mode': <BoxMode.XYXY_ABS: 0>, 'segmentation': [[350.0, 191.0, 300.0, 211.0, 248.0, 295.0, 214.0, 347.0, 210.0, 439.0, 252.0, 549.0, 290.0, 571.0, 350.0, 465.0, 398.0, 359.0, 420.0, 297.0]], 'bbox': [210.0, 191.0, 420.0, 571.0], 'category_id': 0, 'iscrowd': 0}]}
{'file_name': 'DatasetIzqIzq/1video1038.jpg', 'height': 1920, 'width': 1080, 'annotations': [{'bbox_mode': <BoxMode.XYXY_ABS: 0>, 'segmentation': [[380.0, 413.0, 324.0, 447.0, 278.0, 539.0, 248.0, 615.0, 252.0, 695.0, 264.0, 771.0, 318.0, 847.0, 384.0, 803.0, 438.0, 753.0, 450.0, 705.0, 442.0, 681.0, 412.0, 665.0, 412.0, 633.0, 420.0, 617.0, 418.0, 591.0, 406.0, 525.0, 398.0, 481.0, 396.0, 445.0]], 'bbox': [248.0, 413.0, 450.0, 847.0], 'category_id': 0, 'iscrowd': 0}]}
{'file_name': 'DatasetIzqIzq/1video432.jpg', 'height': 1920, 'width': 1080, 'annotations': [{'bbox_mode': <BoxMode.XYXY_ABS: 0>, 'segmentation': [[414.0, 667.0, 378.

# 4. Set the training Configs

In [11]:
cfg = get_cfg()
cfg.merge_from_file("detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ('DatasetTraining',)
cfg.DATASETS.TEST = ()   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"  # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough, but you can certainly train longer
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (EyeFundus)
cfg.OUTPUT_DIR = "output"
#cfg.MODEL.DEVICE = "cpu" #In case your computer doest have GPU

# 5. Train

In [12]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok = True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume = False)
trainer.train()

[32m[12/21 15:37:11 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[12/21 15:37:11 d2.data.build]: [0mRemoved 0 images with no usable annotations. 307 images left.
[32m[12/21 15:37:11 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[12/21 15:37:11 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[12/21 15:37:11 d2.data.common]: [0mSerializing 307 elements to byte tensors and concatenating them all ...
[32m[12/21 15:37:11 d2.data.common]: [0mSerialized dataset takes 0.15 MiB


Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[12/21 15:37:11 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[12/21 15:37:15 d2.utils.events]: [0m eta: 0:01:02  iter: 19  total_loss: 1.811  loss_cls: 0.7457  loss_box_reg: 0.3166  loss_mask: 0.6946  loss_rpn_cls: 0.03199  loss_rpn_loc: 0.008668  time: 0.2228  data_time: 0.0125  lr: 1.6068e-05  max_mem: 2672M
[32m[12/21 15:37:20 d2.utils.events]: [0m eta: 0:00:57  iter: 39  total_loss: 1.631  loss_cls: 0.4967  loss_box_reg: 0.3808  loss_mask: 0.6805  loss_rpn_cls: 0.01771  loss_rpn_loc: 0.00562  time: 0.2192  data_time: 0.0026  lr: 3.2718e-05  max_mem: 2672M
[32m[12/21 15:37:24 d2.utils.events]: [0m eta: 0:00:52  iter: 59  total_loss: 1.458  loss_cls: 0.3292  loss_box_reg: 0.4443  loss_mask: 0.6474  loss_rpn_cls: 0.02455  loss_rpn_loc: 0.005923  time: 0.2189  data_time: 0.0028  lr: 4.9367e-05  max_mem: 2672M
[32m[12/21 15:37:29 d2.utils.events]: [0m eta: 0:00:48  iter: 79  total_loss: 1.298  loss_cls: 0.2733  loss_box_reg: 0.4231  loss_mask: 0.5951  l

# 6. Save model for testing

In [13]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set the testing threshold for this model
cfg.DATASETS.TEST = ('DatasetTest', )
predictor = DefaultPredictor(cfg)

# 7. Inference on Validation Set

In [None]:
from detectron2.utils.visualizer import ColorMode
dataset_dicts = Get_EyeFundus_Data("DatasetTraining")
for d in random.sample(dataset_dicts, 10):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    print(outputs)
    v = Visualizer(im[:, :, ::-1],
                   metadata=MetadataCatalog.get("balloon_val"), 
                   scale=0.8, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow('final', v.get_image()[:, :, ::-1])
    cv2.waitKey(0)
cv2.destroyAllWindows()    

{'instances': Instances(num_instances=1, image_height=1920, image_width=1080, fields=[pred_boxes: Boxes(tensor([[ 339.1504,  646.5409,  563.3959, 1079.3363]], device='cuda:0')), scores: tensor([0.9844], device='cuda:0'), pred_classes: tensor([0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])}
