In [1]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import logging
logger = logging.getLogger('detectron2')

import numpy as np
import cv2
from PIL import Image
from IPython.display import display

import torch
from detectron2 import model_zoo
from detectron2.engine.defaults import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog,build_detection_train_loader
from detectron2.structures import Boxes, Instances

from scripts.trainer import do_test

In [2]:
#Paths
from pathlib import Path
base_folder = Path('..')
data_folder = base_folder/'data'/'til2020'
train_imgs_folder = data_folder/'train'
train_annotations = data_folder/'train.json'
val_imgs_folder = data_folder/'val'
val_annotations = data_folder/'val.json'
test_imgs_folder = data_folder/'CV_interim_images'
test_annotations = data_folder/'CV_interim_evaluation.json'

save_model_folder = base_folder/'ckpts'
load_model_folder = base_folder/'final_ckpts'

In [3]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("til_val", {}, val_annotations, val_imgs_folder)
register_coco_instances("til_test", {}, test_annotations, test_imgs_folder)

In [4]:
cfg_cropper = get_cfg()
cfg_cropper.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml"))
cfg_cropper.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")

cropper = DefaultPredictor(cfg_cropper)

In [5]:
cfg_boxer = get_cfg()
cfg_boxer.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))

cfg_boxer.MODEL.ROI_HEADS.NUM_CLASSES = 5
cfg_boxer.MODEL.WEIGHTS = str(load_model_folder/"ft-til_resnet101_rcnn-17999-best_val.pth")

cfg_boxer.DATASETS.TRAIN = ("til_val",)
cfg_boxer.DATASETS.TEST = ("til_val",)

boxer = DefaultPredictor(cfg_boxer)
build_detection_train_loader(cfg_boxer) #force meta to load

[32m[06/19 18:22:56 d2.data.datasets.coco]: [0mLoaded 1474 images in COCO format from ../data/til2020/val.json
[32m[06/19 18:22:56 d2.data.build]: [0mRemoved 0 images with no usable annotations. 1474 images left.
[32m[06/19 18:22:56 d2.data.build]: [0mDistribution of instances among all 5 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|    tops    | 317          |  trousers  | 313          | outerwear  | 316          |
|  dresses   | 1338         |   skirts   | 174          |            |              |
|   total    | 2458         |            |              |            |              |[0m
[32m[06/19 18:22:56 d2.data.common]: [0mSerializing 1474 elements to byte tensors and concatenating them all ...
[32m[06/19 18:22:56 d2.data.common]: [0mSerialized dataset takes 0.37 MiB
[32m[06/19 18:22:56 d2.data.detection_utils]: [0mTransformGe

<detectron2.data.common.AspectRatioGroupedDataset at 0x7f92b67ea640>

In [6]:
#dont ever attempt to train this, it doesnt even have an training output mode let alone backprop
class PipelineWrapper(torch.nn.Module):
    def __init__(self,cropper,boxer):
        super(PipelineWrapper, self).__init__()
        self.cropper = cropper
        self.boxer = boxer

        self.crop_person_confidence = 0.95
        self.metadata = MetadataCatalog.get("coco_2017_val")
        self.crop_boundary = 0.1

    def forward(self,ims):
        outputs = []
        for im_data in ims:
            #convert im back to "normal" (HURR DURR IM A BARBARIAN)
            im = im_data['image'].detach().numpy().transpose(1,2,0)
            im = cv2.resize(im,(im_data['width'],im_data['height']))

            im_out = self.cropper(im)
            im_crops = [tuple(self.crop_bbox(im,bbox)) for bbox in self.get_human_bboxes(im_out)]
                
            collated = []
            for im_crop,o in im_crops:
                inst = self.boxer(im_crop)['instances'].to('cpu')
                boxes = (inst.pred_boxes.tensor + torch.tensor([o[0],o[1],o[0],o[1]])).tolist()
                scores = inst.scores.tolist()
                classes = inst.pred_classes.tolist()
                for i in range(len(inst)): collated.append((boxes[i],scores[i],classes[i]))
            
            collated = sorted(collated,key=lambda x: x[1],reverse=True)[:100]
            #print(collated)

            outputs.append({"instances":Instances((im_data['height'],im_data['width']),
                pred_boxes=Boxes(torch.tensor([x[0] for x in collated])),
                scores=torch.tensor([x[1] for x in collated]),
                pred_classes=torch.tensor([x[2] for x in collated])
            )})
        return outputs


    def get_human_bboxes(self,output):
        meta_cats = self.metadata.thing_classes
        confidence = self.crop_person_confidence
        raw = output['instances']
        boxes = raw.pred_boxes.tensor.tolist()
        cats = [meta_cats[x] for x in raw.pred_classes.tolist()]
        scores = raw.scores.tolist()
        return [boxes[i] for i in range(len(raw)) if cats[i] == 'person' and scores[i] >= confidence]

    #works when boundary is 999 even, so it works
    def crop_bbox(self,im,bbox):
        b = self.crop_boundary
        x1,y1,x2,y2 = bbox
        h,w = im.shape[:2]
        xf,yf = b*(x2-x1),b*(y2-y1)
        x1,y1,x2,y2 = round(max(0,x1-xf)),round(max(0,y1-yf)),round(min(w,x2+xf)),round(min(h,y2+yf))
        #https://github.com/yu45020/Waifu2x INSERT UPSAMPLER
        return im[y1:y2,x1:x2],(x1,y1)


im = cv2.imread("../input.jpg")
tester = DefaultPredictor(cfg_boxer)
model = PipelineWrapper(cropper,boxer)
tester.model = model

im_instance = tester(im)['instances']

v = Visualizer(im, MetadataCatalog.get("til_val"))
v = v.draw_instance_predictions(im_instance)
im_out = Image.fromarray(v.get_image()[:,:,::-1]) #channels are reversed
display(im_out)

In [7]:
model = PipelineWrapper(cropper,boxer)
do_test(cfg_boxer,model,dataset_name="til_test")

[32m[06/19 18:23:02 d2.data.datasets.coco]: [0mLoaded 1000 images in COCO format from ../data/til2020/CV_interim_evaluation.json
[32m[06/19 18:23:02 d2.data.build]: [0mDistribution of instances among all 5 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|    tops    | 0            |  trousers  | 0            | outerwear  | 0            |
|  dresses   | 0            |   skirts   | 0            |            |              |
|   total    | 0            |            |              |            |              |[0m
[32m[06/19 18:23:02 d2.data.common]: [0mSerializing 1000 elements to byte tensors and concatenating them all ...
[32m[06/19 18:23:02 d2.data.common]: [0mSerialized dataset takes 0.12 MiB
[32m[06/19 18:23:02 d2.evaluation.evaluator]: [0mStart inference on 1000 images
[32m[06/19 18:23:05 d2.evaluation.evaluator]: [0mInference done 

<detectron2.evaluation.coco_evaluation.COCOEvaluator at 0x7f92c5d69580>

## Approaches
2. Upscaling: https://github.com/yu45020/Waifu2x ?????
4. map bbox back to original (how to increase cropping precision? upscale original image before cropping?)



### Good reason to use R101-FPN for everything:
1. It trains fast
2. It uses decent vram
3. It has near SOTA performance anyways
4. AND IT PREDICTS FAST ANYWAYS
5. why is this model so OP