In [1]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
from matplotlib import pyplot as plt
import random
import json
import pickle

import time
import os
from tqdm import tqdm

# import some common detectron2 utilities
import torch

from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image
from detectron2.structures.boxes import Boxes
from detectron2.structures.instances import Instances

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.modeling import build_model
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [2]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml")
predictor = DefaultPredictor(cfg)

In [3]:
def doit(raw_image, raw_boxes):
        # Process Boxes
    raw_boxes = Boxes(torch.from_numpy(raw_boxes).cuda())
    
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        
        # Preprocessing
        image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
        
        # Scale the box
        new_height, new_width = image.shape[:2]
        scale_x = 1. * new_width / raw_width
        scale_y = 1. * new_height / raw_height
        #print(scale_x, scale_y)
        boxes = raw_boxes.clone()
        boxes.scale(scale_x=scale_x, scale_y=scale_y)
        
        # ----
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)
        
        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)
        
        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [boxes]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        return feature_pooled       
        

In [60]:
coco_key = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_dataset_id_to_contiguous_id
coco_key = {coco_key[k]: k for k in coco_key}

In [7]:
IMAGE_DIR = "/home/jamesp/data/vcr/vcr1images"
movie_dirs = sorted(os.listdir(IMAGE_DIR))
print(len(movie_dirs))
OUTPUT_DIR = "/home/jamesp/data/visualcomet/features"

2335


In [12]:
for movie in tqdm(movie_dirs[:-2]):
    img_ids = list(set([id[:id.rfind('.')] for id in os.listdir(os.path.join(IMAGE_DIR,movie))]))
    for id in sorted(img_ids):
        im = cv2.imread(os.path.join(IMAGE_DIR,movie,id+'.jpg'))
        metadata = json.load(open(os.path.join(IMAGE_DIR,movie,id+'.json')))
        boxes = np.array(metadata['boxes'])[:,:4]
        h = metadata['height']
        w = metadata['width']
        boxes = np.row_stack((np.array([0,0,w,h]),boxes))
        obj_rep = doit(im, boxes).to("cpu").numpy()
        
        features = {'image_features' : obj_rep[0],
                    'object_features' : obj_rep[1:]}
        pickle.dump(features, open(os.path.join(OUTPUT_DIR,id+'.pkl'),'wb'))


  0%|          | 0/2333 [00:00<?, ?it/s][A

KeyboardInterrupt: 