In [1]:
#https://detectron2.readthedocs.io/
import detectron2
import logging
import torch
from detectron2.utils.logger import setup_logger
setup_logger()
logger = logging.getLogger('detectron2')

In [2]:
#Paths
from pathlib import Path
base_folder = Path('.')
data_folder = base_folder/'til2020'
train_imgs_folder = data_folder/'train'
train_annotations = data_folder/'train.json'
val_imgs_folder = data_folder/'val'
val_annotations = data_folder/'val.json'
test_imgs_folder = data_folder/'CV_interim_images'

#keep these the same
save_model_folder = base_folder/'ckpts'
load_model_folder = base_folder/'ckpts'

In [3]:
#register datasets. now they can be used as if they were native. remember to run fix_annotations.py to convert TIL2020 to proper COCO format
#to implement custom loaders, such as pickled, https://detectron2.readthedocs.io/modules/data.html?highlight=DatasetCatalog#detectron2.data.DatasetCatalog
from detectron2.data.datasets import register_coco_instances
register_coco_instances("til_train", {}, train_annotations, train_imgs_folder)
register_coco_instances("til_val", {}, val_annotations, val_imgs_folder)

In [4]:
#https://detectron2.readthedocs.io/modules/config.html
#btw, i added some custom config options for my checkpointer and pipeline
from detectron2 import model_zoo
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
#cfg.SEED = 42

cfg.DATASETS.TRAIN = ("til_train",)
cfg.DATASETS.TEST = ("til_val",)
cfg.OUTPUT_DIR = str(save_model_folder)

cfg.DATALOADER.NUM_WORKERS = 12
cfg.DATALOADER.SAMPLER_TRAIN = "RepeatFactorTrainingSampler" #deals with class imbalance
cfg.DATALOADER.REPEAT_THRESHOLD = 0.6

cfg.SOLVER.IMS_PER_BATCH = 2 #batch_size
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.GAMMA = 0.5 #halves the learning rate at each milestone
cfg.SOLVER.STEPS = (10000000,)#(20000,60000,100000,140000) # milestones in iterations
#^I played with these but realised it had little effect: so keep BASE_LR at 0.000125*IMS_PER_BATCH

#Pipeline augmentation settings (i implemented these)
cfg.INPUT.CROP.ENABLED = True
cfg.INPUT.CROP.SIZE = [0.7, 0.9]
#cfg.INPUT.RAND_ROTATION = [-2,2]
cfg.INPUT.RAND_CONTRAST = [0.9,1.1]
cfg.INPUT.RAND_BRIGHTNESS = [0.9,1.1]
cfg.INPUT.RAND_SATURATION = [0.9,1.1]


cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5 #number of categories
cfg.PRINT_EVERY = 200
cfg.SAVE_EVERY = 1000 #when using ValCheckpointer, it saves only if val loss is the minimum so far

cfg.SOLVER.MAX_ITER = -1
cfg.SOLVER.EPOCHS = 30 #facebook uses the paradigm of endless datastreams, as such epochs dont really exist
cfg.EPOCH_SIZE = int(8225/cfg.SOLVER.IMS_PER_BATCH)

logger.info(f"Loaded Config:\n{cfg.dump()}")

[32m[06/14 16:41:59 detectron2]: [0mLoaded Config:
CUDNN_BENCHMARK: false
DATALOADER:
  ASPECT_RATIO_GROUPING: true
  FILTER_EMPTY_ANNOTATIONS: true
  NUM_WORKERS: 12
  REPEAT_THRESHOLD: 0.6
  SAMPLER_TRAIN: RepeatFactorTrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: []
  PROPOSAL_FILES_TRAIN: []
  TEST:
  - til_val
  TRAIN:
  - til_train
EPOCH_SIZE: 4112
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: true
    SIZE:
    - 0.7
    - 0.9
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN:
  - 640
  - 672
  - 704
  - 736
  - 768
  - 800
  MIN_SIZE_TRAIN_SAMPLING: choice
  RAND_BRIGHTNESS:
  - 0.9
  - 1.1
  RAND_CONTRAST:
  - 0.9
  - 1.1
  RAND_SATURATION:
  - 0.9
  - 1.1
MODEL:
  ANCHOR_GENERATOR:
    ANGLES:
    - - -90
      - 0
      - 90
    ASPECT_RATIOS:
    - - 0.5
      - 1.0
      - 2.0
    NAME: DefaultAn

In [5]:
from detectron2.modeling import build_model
model = build_model(cfg)
logger.info(f"Created Model:\n{model}")

 (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
      )
      (res3): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv1): Conv2d(
            256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False
            (n

In [6]:
from pipeline import DatasetPipeline

#uses whatever pycocotools is installed, make sure it is the TIL one
def do_test(cfg,model):
    from detectron2.evaluation import COCOEvaluator, inference_on_dataset
    from detectron2.data import build_detection_test_loader
    evaluator = COCOEvaluator(cfg.DATASETS.TEST[0],cfg,False)
    val_loader = build_detection_test_loader(cfg,cfg.DATASETS.TEST[0])
    inference_on_dataset(model,val_loader,evaluator)

def do_eval(cfg,model):
    from detectron2.data import build_detection_test_loader,DatasetMapper
    from tqdm import tqdm
    dataloader = build_detection_test_loader(cfg,cfg.DATASETS.TEST[0],mapper=DatasetPipeline(cfg,False))
    total_loss = 0
    logger.info("Calculating Validation Loss...")
    with torch.no_grad():
        for iteration,data in enumerate(tqdm(dataloader)):
            loss_dict = model(data)
            total_loss += sum(loss_dict.values())
    tqdm.write("\n")
    return total_loss/len(dataloader)

def do_train(cfg,model,model_context,resume=False):
    from detectron2.utils.events import (
        CommonMetricPrinter,
        EventStorage,
        JSONWriter,
        TensorboardXWriter,
    )
    import detectron2.utils.comm as comm
    from detectron2.solver import build_lr_scheduler, build_optimizer
    from detectron2.data import build_detection_train_loader
    from detectron2.checkpoint import DetectionCheckpointer
    from checkpointer import ValCheckpointer #eh, it gets the essence of early stopping & Im too lazy to make it actually early stop

    optimizer = build_optimizer(cfg, model)
    scheduler = build_lr_scheduler(cfg, optimizer)
    saver = DetectionCheckpointer(model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler)
    meta = saver.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume) #loads the model weights & returns stored meta

    val_loss = meta.get('min_val_loss',meta.get('val_loss',999))
    checkpointer = ValCheckpointer(saver,cfg.SAVE_EVERY,model_context,lambda: do_eval(cfg,model),val_loss)
    
    if 'EPOCHS' in cfg.SOLVER.keys(): cfg.SOLVER.MAX_ITER = cfg.SOLVER.EPOCHS*cfg.EPOCH_SIZE
    max_iter = cfg.SOLVER.MAX_ITER
    start_iter = meta.get("iteration",-1)+1 

    if resume: 
        logger.info(f"Resumed model: {meta.get('model_name','unknown')}")
        #override some configs from checkpoint in case you changed them
        scheduler.milestones = cfg.SOLVER.STEPS
        scheduler.gamma = cfg.SOLVER.GAMMA
        scheduler.base_lrs = [cfg.SOLVER.BASE_LR for lr in scheduler.base_lrs]
        scheduler.last_epoch = start_iter

    writers = [
        CommonMetricPrinter(max_iter),
        JSONWriter(f"{cfg.OUTPUT_DIR}/{model_context}-metrics.json"),
        TensorboardXWriter(cfg.OUTPUT_DIR),
    ]

    model.train() #set to training mode (PyTorch)
    dataloader = build_detection_train_loader(cfg,mapper=DatasetPipeline(cfg,True))
    logger.info(f"Training: Start Iter {start_iter}, End Iter {max_iter}")
    with EventStorage(start_iter) as storage:
        for iteration,data in zip(range(start_iter,max_iter),dataloader):
            try:
                iteration = iteration + 1
                storage.step()
                
                loss_dict = model(data)
                losses = sum(loss_dict.values())
                assert torch.isfinite(losses).all(), loss_dict

                loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
                losses_reduced = sum(loss for loss in loss_dict_reduced.values())
                if comm.is_main_process(): storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)

                optimizer.zero_grad()
                losses.backward()
                optimizer.step()
                storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
                scheduler.step()

                if iteration - start_iter > 5 and (iteration % cfg.PRINT_EVERY == 0 or iteration == max_iter):
                    for writer in writers: writer.write()
                checkpointer.step(iteration)
                
            except (Exception,KeyboardInterrupt) as e:
                logger.info("ERROR! Dumping current model...")
                checkpointer.save(f"{model_context}-{iteration}-interrupted",iteration=iteration,model_name=model_context,min_val_loss=checkpointer.min_loss)
                raise e
    checkpointer.save(f"{model_context}-{max_iter}-final",iteration=max_iter,model_name=model_context,min_val_loss=checkpointer.min_loss)


In [7]:
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
#cfg.MODEL.WEIGHTS = str(load_model_folder/"ft-til_resnet101_rcnn-143999-best_val.pth")
do_train(cfg,model,"ft-til_resnet101_rcnn",resume=False)
#if this doesnt work correctly, check what model is set in the  file in /ckpts/last_checkpoint
#I uploaded the model file to the google drive

Unable to load 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (6, 1024) in the model!
Unable to load 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (6,) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (20, 1024) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (20,) in the model!
[32m[06/14 16:39:09 detectron2]: [0mPipeline: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip(), RandomContrast(intensity_min=0.9, intensity_max=1.1), RandomBrightness(intensity_min=0.9, intensity_max=1.1), RandomSaturation(intensity_min=0.9, intensity_max=1.1)]
[32m[06/14 16:39:09 d2.data.datasets.coco]: [0mLoaded 822

RuntimeError: CUDA out of memory. Tried to allocate 252.00 MiB (GPU 0; 7.76 GiB total capacity; 5.12 GiB already allocated; 233.56 MiB free; 5.84 GiB reserved in total by PyTorch)

In [7]:
#if do_train() hasnt been called (it loads the model) and you want to load the model
if True:
    from detectron2.checkpoint import DetectionCheckpointer
    cfg.MODEL.WEIGHTS = str(load_model_folder/"ft-til_resnet101_rcnn-143999-best_val.pth")
    DetectionCheckpointer(model, cfg.OUTPUT_DIR).resume_or_load(cfg.MODEL.WEIGHTS, resume=False)

do_test(cfg,model)

In [5]:
from detectron2.engine import DefaultPredictor
cfg.MODEL.WEIGHTS = str(load_model_folder/"ft-til_resnet101_rcnn-143999-best_val.pth")
predictor = DefaultPredictor(cfg) #check what exactly default predictor does, and if it affects anything badly.
#Are output boxes supposed to be rescaled? btw, the current pipeline resizes back to original dimensions

In [6]:
def boxes2xywh(boxes):
    return [[x1,y1,x2-x1,y2-y1] for x1,y1,x2,y2 in boxes.tensor.tolist()]

def convert_prediction(img_id,output):
    outs = []
    raw = output['instances']
    boxes = boxes2xywh(raw.pred_boxes)
    cats = raw.pred_classes.tolist()
    scores = raw.scores.tolist()

    for i in range(len(raw)):
        outs.append({
            'image_id':img_id,
            'category_id':cats[i],
            'bbox':boxes[i],
            'score':scores[i]
        })
    return outs


In [9]:
import json
from tqdm import tqdm
from PIL import Image
import numpy as np

output = []
for im_path in tqdm(list(test_imgs_folder.glob('*'))):
    im = np.array(Image.open(im_path).convert('RGB'))
    output += convert_prediction(int(im_path.stem),predictor(im))

with open('ans.json','w') as f:
    json.dump(output,f)

100%|██████████| 1000/1000 [02:44<00:00,  6.07it/s]


In [16]:
from PIL import Image
from IPython.display import display
import numpy as np

from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
im = np.array(Image.open(train_imgs_folder/"69.jpg"))
outputs = predictor(im)
v = Visualizer(im[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TEST[0]), scale=1.2)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
im_out = Image.fromarray(v.get_image()[:, :, ::-1]) #channels are reversed
#display(im_out)