In [3]:
from IPython.display import Image, display
import PIL.Image
import io
import torch
import numpy as np
from processing_image import Preprocess
from visualizing_image import SingleImageViz
from modeling_frcnn import GeneralizedRCNN
from utils import Config
import utils
from transformers import VisualBertForQuestionAnswering, BertTokenizerFast

In [4]:
from datasets import load_dataset, Image
import pickle
import os
import cv2
import torch
import numpy as np
from torchvision import transforms

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"


In [8]:
# load models and model components
frcnn_cfg = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
frcnn_cfg.model.DEVICE = device

frcnn = GeneralizedRCNN.from_pretrained("unc-nlp/frcnn-vg-finetuned", config=frcnn_cfg).to(device)

image_preprocess = Preprocess(frcnn_cfg)

# bert_tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
# visualbert_vqa = VisualBertForQuestionAnswering.from_pretrained("uclanlp/visualbert-vqa")

loading configuration file cache
%s not found in cache or force_download set to True, downloading to %s https://cdn.huggingface.co/unc-nlp/frcnn-vg-finetuned/pytorch_model.bin /root/.cache/torch/transformers/tmp64uzat92


Downloading:   0%|          | 0.00/262M [00:00<?, ?B/s]

loading weights file https://cdn.huggingface.co/unc-nlp/frcnn-vg-finetuned/pytorch_model.bin from cache at /root/.cache/torch/transformers/57f6df6abe353be2773f2700159c65615babf39ab5b48114d2b49267672ae10f.77b59256a4cf8343ae0f923246a81489fc8d82f98d082edc2d2037c977c0d9d0
All model checkpoint weights were used when initializing GeneralizedRCNN.

All the weights of GeneralizedRCNN were initialized from the model checkpoint at unc-nlp/frcnn-vg-finetuned.
If your task is similar to the task the model of the checkpoint was trained on, you can already use GeneralizedRCNN for predictions without further training.


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/153k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/455M [00:00<?, ?B/s]

In [9]:
cifar100_data = load_dataset("cifar100")

Downloading builder script:   0%|          | 0.00/5.61k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/4.21k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/9.83k [00:00<?, ?B/s]

Downloading and preparing dataset cifar100/cifar100 to /root/.cache/huggingface/datasets/cifar100/cifar100/1.0.0/f365c8b725c23e8f0f8d725c3641234d9331cd2f62919d1381d1baa5b3ba3142...


Downloading data:   0%|          | 0.00/169M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50000 [00:00<?, ? examples/s]



Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Dataset cifar100 downloaded and prepared to /root/.cache/huggingface/datasets/cifar100/cifar100/1.0.0/f365c8b725c23e8f0f8d725c3641234d9331cd2f62919d1381d1baa5b3ba3142. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
train_data = cifar100_data["train"]
test_data = cifar100_data["test"]
to_tensor = transforms.ToTensor()

In [12]:
print("Generating " + str(len(train_data)) + " embeddings")

visual_embeddings = []
fine_labels = []
coarse_labels = []

for idx, img_data in enumerate(train_data):
    if idx % 1000 == 0:
        print("On idx " + str(idx))
    image = np.array(img_data["img"])
    # run frcnn
    images, sizes, scales_yx = image_preprocess(image)
    output_dict = frcnn(
        images,
        sizes,
        scales_yx=scales_yx,
        padding="max_detections",
        max_detections=frcnn_cfg.max_detections,
        return_tensors="pt",
    )
    features = output_dict.get("roi_features")
    
    visual_embeddings.append(features)
    fine_labels.append(img_data["fine_label"])
    coarse_labels.append(img_data["coarse_label"])


Generating 50000 embeddings
On idx 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


On idx 1000
On idx 2000
On idx 3000
On idx 4000
On idx 5000
On idx 6000
On idx 7000
On idx 8000
On idx 9000
On idx 10000
On idx 11000
On idx 12000
On idx 13000
On idx 14000
On idx 15000
On idx 16000
On idx 17000
On idx 18000
On idx 19000
On idx 20000
On idx 21000
On idx 22000
On idx 23000
On idx 24000
On idx 25000
On idx 26000
On idx 27000
On idx 28000
On idx 29000
On idx 30000
On idx 31000
On idx 32000
On idx 33000
On idx 34000
On idx 35000
On idx 36000
On idx 37000
On idx 38000
On idx 39000
On idx 40000
On idx 41000
On idx 42000
On idx 43000
On idx 44000
On idx 45000
On idx 46000
On idx 47000
On idx 48000
On idx 49000


In [13]:
cifar100_train_embeddings = {
    "embeddings": visual_embeddings,
    "fine_labels": fine_labels,
    "coarse_labels": coarse_labels
}

with open("cifar100-train-embeddings.pkl", "wb") as f:
    pickle.dump(cifar100_train_embeddings, f)

In [14]:
print("Generating " + str(len(test_data)) + " embeddings")

visual_embeddings = []
fine_labels = []
coarse_labels = []

for idx, img_data in enumerate(test_data):
    if idx % 1000 == 0:
        print("On idx " + str(idx))
    image = np.array(img_data["img"])
    # run frcnn
    images, sizes, scales_yx = image_preprocess(image)
    output_dict = frcnn(
        images,
        sizes,
        scales_yx=scales_yx,
        padding="max_detections",
        max_detections=frcnn_cfg.max_detections,
        return_tensors="pt",
    )
    features = output_dict.get("roi_features")
    
    visual_embeddings.append(features)
    fine_labels.append(img_data["fine_label"])
    coarse_labels.append(img_data["coarse_label"])


Generating 10000 embeddings
On idx 0
On idx 1000
On idx 2000
On idx 3000
On idx 4000
On idx 5000
On idx 6000
On idx 7000
On idx 8000
On idx 9000


In [15]:
cifar100_test_embeddings = {
    "embeddings": visual_embeddings,
    "fine_labels": fine_labels,
    "coarse_labels": coarse_labels
}

with open("cifar100-test-embeddings.pkl", "wb") as f:
    pickle.dump(cifar100_test_embeddings, f)