In [1]:
import os
import cv2
import torch
import numpy as np
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.data import MetadataCatalog
from torchvision.transforms.functional import to_tensor
import torch.nn.functional as F
import pickle

In [2]:
def setup_detectron2_model(config_file, model_weights, device):
    cfg = get_cfg()
    cfg.merge_from_file(config_file)
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5
    cfg.MODEL.WEIGHTS = model_weights
    cfg.MODEL.DEVICE = device

    model = build_model(cfg)
    model.eval()
    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
    return model

In [3]:
def extract_roi_features(image, model):
    image = to_tensor(image)  # Convert image to a PyTorch tensor
    image = image.to(model.device)  # Move image to the same device as the model
    image = (image * 255).to(torch.uint8)  # Convert back to the original range (0-255)

    with torch.no_grad():
        input_image = model.preprocess_image([{"image": image, "height": image.shape[-2], "width": image.shape[-1]}])
        features = model.backbone(input_image.tensor)
        proposals, _ = model.proposal_generator(input_image, features)
        instances, _ = model.roi_heads(input_image, features, proposals)
        roi_features = model.roi_heads.box_pooler(
            [features[f] for f in ["p2", "p3", "p4", "p5"]],
            [x.pred_boxes for x in instances]
        )
        
        # Apply Adaptive Average Pooling to get fixed-size output
        mean_roi_features = torch.mean(roi_features, dim=0)
        
    return mean_roi_features.cpu()

In [4]:
def get_tiny_imagenet_images(path):
    images = []
    for root, _, files in os.walk(path):
        for file in files:
            if file.endswith(".JPEG"):
                images.append(os.path.join(root, file))
    return images

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
config_file = "faster_rcnn_R_50_FPN_3x.yaml"
model_weights = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl"
model = setup_detectron2_model(config_file, model_weights, device)

model_final_280758.pkl: 167MB [00:01, 100MB/s]                               


In [6]:
tiny_imagenet_path = "tiny-imagenet-200"
image_paths = get_tiny_imagenet_images(tiny_imagenet_path)

In [7]:
print("Generating: " + str(len(image_paths)) + " embeddings, for " + tiny_imagenet_path)

visual_embeddings = {}

for idx, image_path in enumerate(image_paths):
    if idx % 1000 == 0:
        print("On idx " + str(idx))
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    roi_features = extract_roi_features(image, model)
    visual_embeddings[image_path] = roi_features

Generating: 120000 embeddings, for tiny-imagenet-200
On idx 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


On idx 1000
On idx 2000
On idx 3000
On idx 4000
On idx 5000
On idx 6000
On idx 7000
On idx 8000
On idx 9000
On idx 10000
On idx 11000
On idx 12000
On idx 13000
On idx 14000
On idx 15000
On idx 16000
On idx 17000
On idx 18000
On idx 19000
On idx 20000
On idx 21000
On idx 22000
On idx 23000
On idx 24000
On idx 25000
On idx 26000
On idx 27000
On idx 28000
On idx 29000
On idx 30000
On idx 31000
On idx 32000
On idx 33000
On idx 34000
On idx 35000
On idx 36000
On idx 37000
On idx 38000
On idx 39000
On idx 40000
On idx 41000
On idx 42000
On idx 43000
On idx 44000
On idx 45000
On idx 46000
On idx 47000
On idx 48000
On idx 49000
On idx 50000
On idx 51000
On idx 52000
On idx 53000
On idx 54000
On idx 55000
On idx 56000
On idx 57000
On idx 58000
On idx 59000
On idx 60000
On idx 61000
On idx 62000
On idx 63000
On idx 64000
On idx 65000
On idx 66000
On idx 67000
On idx 68000
On idx 69000
On idx 70000
On idx 71000
On idx 72000
On idx 73000
On idx 74000
On idx 75000
On idx 76000
On idx 77000
On idx 7

In [8]:
with open('tiny-imagenet-200/val/val_annotations.txt', 'r') as f:
    val_annotations_lines = f.readlines()
    
val_annotations = {}

for line in val_annotations_lines:
    line = line.split('\t')
    val_annotations[line[0]] = line[1]

In [9]:
embeddings = visual_embeddings

In [16]:
new_embeddings = {
    'test': {
        'embeddings': [],
    },
    'val': {
        'embeddings': [],
        'labels': []
    },
    'train': {
        'embeddings': [],
        'labels': []
    }
}

counter = 0

for k, v in embeddings.items():
    path = k.split('/')
    d = None
    if path[1] == 'test':
        new_embeddings['test']['embeddings'].append(v)
        if counter % 500 == 0:
            print("Added test embedding with shape", v.shape)
    elif path[1] == 'val':
        new_embeddings['val']['embeddings'].append(v)
        new_embeddings['val']['labels'].append(val_annotations[path[-1]])
        if counter % 500 == 0:
            print("Added val embedding with shape", v.shape)
            print("And label " + val_annotations[path[-1]])
    elif path[1] == 'train':
        new_embeddings['train']['embeddings'].append(v)
        new_embeddings['train']['labels'].append(path[-3])
        if counter % 500 == 0:
            print("Added train embedding with shape", v.shape)
            print("And label " + path[-3])
    counter += 1

Added test embedding with shape torch.Size([1, 256, 7, 7])
Added test embedding with shape torch.Size([2, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([2, 256, 7, 7])
Added test embedding with shape torch.Size([3, 256, 7, 7])
Added test embedding with shape torch.Size([1, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7])
Added test embedding with shape torch.Size([0, 256, 7, 7

In [17]:
with open("tiny-imagenet-embeddings.pkl", 'wb') as f:
    pickle.dump(new_embeddings, f)