In [17]:
import pickle
import json
import numpy as np
import os

In [18]:
EXTRACTED_OBJECTS_PATH = 'objects_with_bounding_boxes.jsonl'
SEGMENTATIONS_PATH = 'pickled_segmentations'
with open(SEGMENTATIONS_PATH, "rb") as file:
    segmentations = pickle.load(file)
    

with open(EXTRACTED_OBJECTS_PATH, 'r') as file:
    extracted_objects = [json.loads(line) for line in file] 

In [19]:
def scale_bbox(bbox):
    # Convert from 1000x1000 cogvlm to 640x480 coco
    corners = bbox.split(",")
    float_corners = [ float(corner)/1000 for corner in corners]
    scaled_x0, scaled_x1 = int(float_corners[0] * 640), int(float_corners[2] * 640)
    scaled_y0, scaled_y1 = int(float_corners[1] * 480), int(float_corners[3] * 480)
    return scaled_x0, scaled_y0, scaled_x1, scaled_y1

In [25]:
base_url = "http://images.cocodataset.org/val2014/"
for object in extracted_objects:
    # Get image name
    image_name = object['question_id']
    # Get extracted bbox
    bbox = object["bounding_box"]
    # Get segmentation mask
    segmentation_mask = segmentations[base_url + image_name]
    # Scale bbo x coords to 640x480 from 1000x1000
    x0, y0, x1, y1 = scale_bbox(bbox)
    # Get the segmentation mask corresponding to the bbox
    # segmentation mask has the shape 
    masked_bbox = segmentation_mask[x0:x1,y0:y1]
    
    num_zeros = np.sum(masked_bbox == 0)
    if num_zeros != 0:
        background_ratio = num_zeros / masked_bbox.size
    else:
        background_ratio = 0.0
    object['background_ratio'] = background_ratio
    if background_ratio > 0.75:
        object['is_hallucination'] = True
    else:
        object['is_hallucination'] = False


In [29]:
output_path = "object_background_matching.jsonl"

with open(output_path, 'w') as file:
    _ = [file.write(json.dumps(object) + "\n") for object in extracted_objects]