# Setup

In [1]:
#!pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [2]:
from datetime import datetime
start_time = datetime.now()


In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
import os

# content/gdrive/My Drive/Kaggle is the path where kaggle.json is 
# present in the Google Drive
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/My Drive/Kaggle"

# change the working directory
%cd /content/gdrive/My Drive/Kaggle

/content/gdrive/My Drive/Kaggle


In [5]:
!mkdir input

mkdir: cannot create directory ‘input’: File exists


In [6]:
!mkdir ./input/annotations/

mkdir: cannot create directory ‘./input/annotations/’: File exists


In [7]:
# copy the annotations
!kaggle datasets download -d konradb/sartorius-annotations

Downloading sartorius-annotations.zip to /content/gdrive/My Drive/Kaggle
 85% 33.0M/38.7M [00:01<00:00, 11.7MB/s]
100% 38.7M/38.7M [00:01<00:00, 34.8MB/s]


In [8]:
!mv sartorius-annotations.zip ./input/annotations/ && cd input/annotations && unzip sartorius-annotations.zip && rm sartorius-annotations.zip

Archive:  sartorius-annotations.zip
replace annotations_train_f0.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: annotations_train_f0.json  
  inflating: annotations_train_f1.json  
  inflating: annotations_train_f2.json  
  inflating: annotations_train_f3.json  
  inflating: annotations_train_f4.json  
  inflating: annotations_valid_f0.json  
  inflating: annotations_valid_f1.json  
  inflating: annotations_valid_f2.json  
  inflating: annotations_valid_f3.json  
  inflating: annotations_valid_f4.json  
  inflating: gt_fold.csv             


In [9]:
# copy the training set
!cp /content/gdrive/My\ Drive/Kaggle/sartorius/input/train.zip .

In [10]:
!mkdir ./input/train/

mkdir: cannot create directory ‘./input/train/’: File exists


In [11]:
!mv train.zip ./input/train/ && cd input/train && unzip train.zip && rm train.zip

Archive:  train.zip
replace .csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: .csv                    
  inflating: 0030fd0e6378.png        
  inflating: 0140b3c8f445.png        
  inflating: 01ae5a43a2ab.png        
  inflating: 026b3c2c4b32.png        
  inflating: 029e5b3b89c7.png        
  inflating: 0323e81d23d9.png        
  inflating: 03b27b381a5f.png        
  inflating: 042c17cd9143.png        
  inflating: 042dc0e561a4.png        
  inflating: 04928f0866b0.png        
  inflating: 049f02e0f764.png        
  inflating: 04cd81dfb245.png        
  inflating: 053d61766edb.png        
  inflating: 05c61f0f46b7.png        
  inflating: 061b49d7dbbb.png        
  inflating: 06c5740c8b18.png        
  inflating: 0728b8f39241.png        
  inflating: 07e9ba109e34.png        
  inflating: 083b178bb184.png        
  inflating: 085eb8fec206.png        
  inflating: 08f52aa2add3.png        
  inflating: 097d60b2cf71.png        
  inflating: 0a6ecc5fe78a.png        
  inflating: 0

In [12]:
!cp /content/gdrive/My\ Drive/Kaggle/sartorius/input/train.csv ./input/

In [13]:
import pandas as pd
import numpy as np
import pycocotools.mask as mask_util
import detectron2
from pathlib import Path
import random, cv2, os
import matplotlib.pyplot as plt
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.logger import setup_logger
from detectron2.evaluation.evaluator import DatasetEvaluator
from detectron2.engine import BestCheckpointer
from detectron2.checkpoint import DetectionCheckpointer
setup_logger()

import torch


In [14]:
class CFG:
    wfold = 4
    data_folder = './input/'
    anno_folder = './input/annotations/'
    model_arch = 'mask_rcnn_R_50_FPN_3x.yaml'
    nof_iters = 20000
    seed = 45

In [15]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(CFG.seed)

# Functions

In [16]:
# Taken from https://www.kaggle.com/theoviel/competition-metric-map-iou
def precision_at(threshold, iou):
    matches = iou > threshold
    true_positives = np.sum(matches, axis=1) == 1  # Correct objects
    false_positives = np.sum(matches, axis=0) == 0  # Missed objects
    false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
    return np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)

def score(pred, targ):
    pred_masks = pred['instances'].pred_masks.cpu().numpy()
    enc_preds = [mask_util.encode(np.asarray(p, order='F')) for p in pred_masks]
    enc_targs = list(map(lambda x:x['segmentation'], targ))
    ious = mask_util.iou(enc_preds, enc_targs, [0]*len(enc_targs))
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, ious)
        p = tp / (tp + fp + fn)
        prec.append(p)
    return np.mean(prec)

class MAPIOUEvaluator(DatasetEvaluator):
    def __init__(self, dataset_name):
        dataset_dicts = DatasetCatalog.get(dataset_name)
        self.annotations_cache = {item['image_id']:item['annotations'] for item in dataset_dicts}
            
    def reset(self):
        self.scores = []

    def process(self, inputs, outputs):
        for inp, out in zip(inputs, outputs):
            if len(out['instances']) == 0:
                self.scores.append(0)    
            else:
                targ = self.annotations_cache[inp['image_id']]
                self.scores.append(score(out, targ))

    def evaluate(self):
        return {"MaP IoU": np.mean(self.scores)}

class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)
    

In [17]:
class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return MAPIOUEvaluator(dataset_name)

    def build_hooks(self):

        # copy of cfg
        cfg = self.cfg.clone()

        # build the original model hooks
        hooks = super().build_hooks()

        # add the best checkpointer hook
        hooks.insert(-1, BestCheckpointer(cfg.TEST.EVAL_PERIOD, 
                                         DetectionCheckpointer(self.model, cfg.OUTPUT_DIR),
                                         "MaP IoU",
                                         "max",
                                         ))
        return hooks

# Data

Setup training / validation split for this fold

In [18]:
dataDir=Path(CFG.data_folder)
register_coco_instances('sartorius_train',{}, CFG.anno_folder + 'annotations_train_f'+str(CFG.wfold)+'.json', dataDir)
register_coco_instances('sartorius_val',{}, CFG.anno_folder + 'annotations_valid_f'+str(CFG.wfold)+'.json', dataDir)
metadata = MetadataCatalog.get('sartorius_train')
train_ds = DatasetCatalog.get('sartorius_train')

[32m[12/07 21:45:57 d2.data.datasets.coco]: [0mLoading ./input/annotations/annotations_train_f4.json takes 1.15 seconds.
[32m[12/07 21:45:57 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ./input/annotations/annotations_train_f4.json


In [19]:
train_ds = DatasetCatalog.get('sartorius_train')

[32m[12/07 21:45:59 d2.data.datasets.coco]: [0mLoading ./input/annotations/annotations_train_f4.json takes 1.17 seconds.
[32m[12/07 21:45:59 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ./input/annotations/annotations_train_f4.json


In [20]:
!ls

input  kaggle.json  output  sartorius


# Model

configuration for the Detectron model

In [21]:
cfg = get_cfg()
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/' + CFG.model_arch))
cfg.DATASETS.TRAIN = ("sartorius_train",)
cfg.DATASETS.TEST = ("sartorius_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-InstanceSegmentation/' + CFG.model_arch) 
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.MAX_ITER = CFG.nof_iters    
cfg.SOLVER.STEPS = []        
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512    
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .4
cfg.TEST.EVAL_PERIOD = len(DatasetCatalog.get('sartorius_train')) // cfg.SOLVER.IMS_PER_BATCH  

[32m[12/07 21:46:01 d2.data.datasets.coco]: [0mLoading ./input/annotations/annotations_train_f4.json takes 1.03 seconds.
[32m[12/07 21:46:01 d2.data.datasets.coco]: [0mLoaded 485 images in COCO format from ./input/annotations/annotations_train_f4.json


In [22]:
CFG.data_folder

'./input/'

In [23]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[12/07 21:46:13 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

model_final_f10217.pkl: 178MB [00:18, 9.78MB/s]                           
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to

[32m[12/07 21:46:38 d2.engine.train_loop]: [0mStarting training from iteration 0


  torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
  torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[12/07 21:46:55 d2.utils.events]: [0m eta: 1:50:50  iter: 19  total_loss: 5.771  loss_cls: 1.405  loss_box_reg: 0.273  loss_mask: 0.6942  loss_rpn_cls: 2.995  loss_rpn_loc: 0.3093  time: 0.8703  data_time: 0.5411  lr: 1.9981e-05  max_mem: 4907M
[32m[12/07 21:47:24 d2.utils.events]: [0m eta: 2:23:03  iter: 39  total_loss: 2.911  loss_cls: 1.167  loss_box_reg: 0.3198  loss_mask: 0.6889  loss_rpn_cls: 0.4047  loss_rpn_loc: 0.3198  time: 1.1512  data_time: 1.0549  lr: 3.9961e-05  max_mem: 5910M
[32m[12/07 21:47:39 d2.utils.events]: [0m eta: 2:16:14  iter: 59  total_loss: 2.474  loss_cls: 0.7842  loss_box_reg: 0.398  loss_mask: 0.6753  loss_rpn_cls: 0.3201  loss_rpn_loc: 0.288  time: 1.0167  data_time: 0.4469  lr: 5.9941e-05  max_mem: 5910M
[32m[12/07 21:47:55 d2.utils.events]: [0m eta: 2:09:27  iter: 79  total_loss: 2.305  loss_cls: 0.6714  loss_box_reg: 0.404  loss_mask: 0.6654  loss_rpn_cls: 0.2991  loss_rpn_loc: 0.2904  time: 0.9594  data_time: 0.4687  lr: 7.9921e-05  max_me

In [24]:
!cp ./output/* /content/gdrive/My\ Drive/Kaggle/sartorius/models/detectron2/

In [None]:
!ls ./output/

In [26]:
end_time = datetime.now()

print(end_time - start_time)

5:45:12.620831
5:45:12.620831
