In [3]:
import json
import pandas as pd
import re

LABELS_PATH = 'bbox_pope_images/labels.json'
OUTPUTS_PATH = 'objects_with_bounding_boxes.jsonl'

In [4]:
# Open the labels file for reading
with open(LABELS_PATH, 'r') as file:
    labels = json.load(file)
    
print(labels.keys())
    
# Open the outputs file for reading
with open(OUTPUTS_PATH, 'r') as file:
    outputs = file.readlines()
    outputs = [json.loads(line) for line in outputs]  # Convert JSON strings into dictionaries

# Create a dataframe from the images data
img_data = labels['images']
img_df = pd.DataFrame(img_data)

# Create a dataframe from the annotations data
bbox_data = labels['annotations']
bbox_df = pd.DataFrame(bbox_data)

# Create a dataframe from the outputs data
outputs_df = pd.DataFrame(outputs)

# Rename the 'id' column in img_df to 'image_id' to match bbox_df for merging
img_df = img_df.rename(columns={'id': 'image_id'})

# Merge bbox_df with the selected columns from img_df
bbox_df = pd.merge(bbox_df, img_df[['image_id', 'height', 'width', 'file_name']], on='image_id', how='left')

bbox_df.head()

dict_keys(['info', 'images', 'licenses', 'annotations', 'categories'])


Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id,height,width,file_name
0,"[[147.76, 396.11, 158.48, 355.91, 153.12, 347....",78969.3169,0,472375,"[124.71, 196.18, 372.85, 356.81]",18,2551,612,612,COCO_val2014_000000472375.jpg
1,"[[503.13, 363.78, 541.77, 358.63, 541.77, 302....",32002.703,0,554002,"[427.58, 77.87, 188.88, 285.91]",18,4594,425,640,COCO_val2014_000000554002.jpg
2,"[[246.53, 244.56, 245.85, 242.52, 246.53, 240....",3011.2207,0,170365,"[243.73, 190.97, 63.85, 89.8]",64,20654,427,640,COCO_val2014_000000170365.jpg
3,"[[206.39, 307.57, 209.55, 293.04, 213.34, 280....",7330.3174,0,515904,"[169.12, 159.12, 91.6, 148.45]",64,21231,428,640,COCO_val2014_000000515904.jpg
4,"[[583.16, 72.6, 584.58, 78.63, 582.1, 85.72, 5...",2169.8542,0,515904,"[581.03, 39.99, 49.63, 74.8]",64,25830,428,640,COCO_val2014_000000515904.jpg


In [5]:
def process_bounding_box_strings(bboxes):
    # Assuming bboxes is a list of string bounding boxes like ["392,130,765,834", ...]
    return list(map(int, bboxes.split(',')))

# Applying the function to each bounding box entry
outputs_df['bounding_box'] = outputs_df['bounding_box'].apply(lambda x: process_bounding_box_strings(x))
outputs_df

Unnamed: 0,question_id,object_name,bounding_box
0,COCO_val2014_000000006033.jpg,Man,"[392, 130, 765, 834]"
1,COCO_val2014_000000006033.jpg,Goat,"[262, 458, 605, 858]"
2,COCO_val2014_000000006033.jpg,Van,"[0, 0, 939, 568]"
3,COCO_val2014_000000012333.jpg,Woman,"[628, 192, 773, 631]"
4,COCO_val2014_000000012333.jpg,Horse,"[492, 154, 994, 984]"
5,COCO_val2014_000000012333.jpg,Crowd,"[0, 145, 998, 694]"
6,COCO_val2014_000000007320.jpg,Person,"[240, 844, 616, 998]"
7,COCO_val2014_000000007320.jpg,Remote,"[370, 574, 597, 998]"
8,COCO_val2014_000000007320.jpg,Game,"[138, 48, 797, 366]"
9,COCO_val2014_000000001171.jpg,Engine,"[116, 82, 768, 891]"


In [6]:
# Group outputs_df by 'question_id' and aggregate 'bounding_box' into lists
grouped_outputs = outputs_df.groupby('question_id')['bounding_box'].agg(list).reset_index()

# Rename the 'question_id' column to 'file_name' to match the merged_df for merging
grouped_outputs_renamed = grouped_outputs.rename(columns={'question_id': 'file_name'})

# Merge merged_df with the grouped_outputs DataFrame
final_df = pd.merge(bbox_df, grouped_outputs_renamed, on='file_name', how='left')

# Create 'pope_outputs' column. Fill NaN values with empty lists if no bounding boxes are associated
final_df['pope_bboxes'] = final_df['bounding_box'].apply(lambda x: x if isinstance(x, list) else [])

# Drop the 'bounding_box' column if not needed, as it duplicates the 'pope_outputs'
final_df.drop(columns=['bounding_box'], inplace=True)

final_df = final_df[final_df['pope_bboxes'].apply(lambda x: len(x) > 0)]
# final_df['pope_bboxes'] = final_df['pope_bboxes'].apply(lambda x: [x[i:i + 4] for i in range(0, len(x), 4)])
# final_df['pope_bboxes'] = final_df['pope_bboxes'].apply(process_bounding_boxes)



final_df.head()

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id,height,width,file_name,pope_bboxes
14,"[[134.17, 434.52, 149.8, 444.94, 161.51, 444.2...",16220.20095,0,6033,"[130.26, 293.9, 175.14, 257.16]",20,61111,640,480,COCO_val2014_000000006033.jpg,"[[392, 130, 765, 834], [262, 458, 605, 858], [..."
69,"[[359.31, 345.69, 357.93, 324.83, 357.93, 313....",36426.03225,0,6033,"[190.86, 86.21, 172.24, 442.07]",1,188348,640,480,COCO_val2014_000000006033.jpg,"[[392, 130, 765, 834], [262, 458, 605, 858], [..."
75,"[[458.5, 244.82, 450.64, 209.02, 449.77, 195.9...",8870.11625,0,6033,"[350.23, 36.14, 129.77, 214.79]",1,201201,640,480,COCO_val2014_000000006033.jpg,"[[392, 130, 765, 834], [262, 458, 605, 858], [..."
99,"[[8.63, 2.88, 440.09, 2.88, 458.79, 107.87, 39...",103988.4875,0,6033,"[4.31, 2.88, 454.48, 350.92]",6,248366,640,480,COCO_val2014_000000006033.jpg,"[[392, 130, 765, 834], [262, 458, 605, 858], [..."
143,"[[2.53, 4.21, 407.87, 1.69, 500.0, 28.65, 496....",179716.9779,0,3845,"[1.69, 1.69, 498.31, 366.57]",67,393204,375,500,COCO_val2014_000000003845.jpg,"[[12, 109, 986, 954], [0, 0, 998, 997]]"


In [7]:
# Group bbox_df by 'file_name' and aggregate 'bbox' into lists
grouped_bboxes = bbox_df.groupby('file_name')['bbox'].agg(list).reset_index()

# Merge outputs_df with the grouped_bboxes DataFrame
outputs_df = pd.merge(outputs_df, grouped_bboxes, left_on='question_id', right_on='file_name', how='left')

# Create 'gt_bboxes' column. Fill NaN values with empty lists if no bounding boxes are associated
outputs_df['gt_bboxes'] = outputs_df['bbox'].apply(lambda x: x if isinstance(x, list) else [])

# Optionally, drop the 'bbox' column if not needed, as it duplicates the 'gt_bboxes'
outputs_df.drop(columns=['bbox'], inplace=True)

outputs_df = pd.merge(outputs_df, img_df[[ 'height', 'width', 'file_name']], on='file_name', how='left')
# outputs_df = outputs_df.drop(columns=['file_name_x', 'file_name_y'])

outputs_df.head()

Unnamed: 0,question_id,object_name,bounding_box,file_name,gt_bboxes,height,width
0,COCO_val2014_000000006033.jpg,Man,"[392, 130, 765, 834]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
1,COCO_val2014_000000006033.jpg,Goat,"[262, 458, 605, 858]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
2,COCO_val2014_000000006033.jpg,Van,"[0, 0, 939, 568]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
3,COCO_val2014_000000012333.jpg,Woman,"[628, 192, 773, 631]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 312.0, 347.86], [325.06, 268....",416,640
4,COCO_val2014_000000012333.jpg,Horse,"[492, 154, 994, 984]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 312.0, 347.86], [325.06, 268....",416,640


In [8]:
def scale_bboxes(row):
    scaled_bboxes = []
    for bbox in row['gt_bboxes']:
        x_min = int(bbox[0] / 1000 * row['width'])
        y_min = int(bbox[1] / 1000 * row['height'])
        x_max = int(bbox[2] / 1000 * row['width'])
        y_max = int(bbox[3] / 1000 * row['height'])
        scaled_bboxes.append([x_min, y_min, x_max, y_max])
    return scaled_bboxes

def scale_bbox(row):
    bbox = row['bounding_box']
    
    x_min = int(bbox[0] / 1000 * row['width'])
    y_min = int(bbox[1] / 1000 * row['height'])
    x_max = int(bbox[2] / 1000 * row['width'])
    y_max = int(bbox[3] / 1000 * row['height'])
    
    return [x_min, y_min, x_max, y_max]


# Apply the function to each row of the DataFrame
# final_df['scaled_pope_bboxes'] = final_df.apply(scale_bboxes, axis=1)
# final_df.head()

outputs_df['bounding_box'] = outputs_df.apply(scale_bbox, axis=1)
outputs_df

Unnamed: 0,question_id,object_name,bounding_box,file_name,gt_bboxes,height,width
0,COCO_val2014_000000006033.jpg,Man,"[188, 83, 367, 533]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
1,COCO_val2014_000000006033.jpg,Goat,"[125, 293, 290, 549]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
2,COCO_val2014_000000006033.jpg,Van,"[0, 0, 450, 363]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 175.14, 257.16], [190.86, 86....",640,480
3,COCO_val2014_000000012333.jpg,Woman,"[401, 79, 494, 262]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 312.0, 347.86], [325.06, 268....",416,640
4,COCO_val2014_000000012333.jpg,Horse,"[314, 64, 636, 409]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 312.0, 347.86], [325.06, 268....",416,640
5,COCO_val2014_000000012333.jpg,Crowd,"[0, 60, 638, 288]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 312.0, 347.86], [325.06, 268....",416,640
6,COCO_val2014_000000007320.jpg,Person,"[90, 422, 231, 499]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 255.7, 172.04], [304.99, 0.01,...",500,375
7,COCO_val2014_000000007320.jpg,Remote,"[138, 287, 223, 499]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 255.7, 172.04], [304.99, 0.01,...",500,375
8,COCO_val2014_000000007320.jpg,Game,"[51, 24, 298, 183]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 255.7, 172.04], [304.99, 0.01,...",500,375
9,COCO_val2014_000000001171.jpg,Engine,"[74, 34, 491, 379]",COCO_val2014_000000001171.jpg,"[[74.67, 35.42, 419.3, 342.71], [0.0, 303.25, ...",426,640


In [9]:
def convert_bbox_format(bboxes):
    # Convert each bbox from [x, y, width, height] to [x_min, y_min, x_max, y_max]
    converted_bboxes = []
    for bbox in bboxes:
        x_min = bbox[0]
        y_min = bbox[1]
        x_max = bbox[0] + bbox[2]  # x_min + width
        y_max = bbox[1] + bbox[3]  # y_min + height
        converted_bboxes.append([x_min, y_min, x_max, y_max])
    return converted_bboxes

# Apply the conversion function to the 'gt_bboxes' column
outputs_df['gt_bboxes'] = outputs_df['gt_bboxes'].apply(convert_bbox_format)

outputs_df.head()


Unnamed: 0,question_id,object_name,bounding_box,file_name,gt_bboxes,height,width
0,COCO_val2014_000000006033.jpg,Man,"[188, 83, 367, 533]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480
1,COCO_val2014_000000006033.jpg,Goat,"[125, 293, 290, 549]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480
2,COCO_val2014_000000006033.jpg,Van,"[0, 0, 450, 363]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480
3,COCO_val2014_000000012333.jpg,Woman,"[401, 79, 494, 262]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 629.38, 410.17], [325.06, 268...",416,640
4,COCO_val2014_000000012333.jpg,Horse,"[314, 64, 636, 409]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 629.38, 410.17], [325.06, 268...",416,640


In [10]:

def calculate_iou(box1, box2):
    # Determine the coordinates of the intersection rectangle
    x_left = max(box1[0], box2[0])
    y_top = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_bottom = min(box1[3], box2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # Calculate area of intersection rectangle
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate the area of both bounding boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Calculate the union area by using the formula: union(A,B) = A + B - Inter(A,B)
    union_area = box1_area + box2_area - intersection_area

    # Compute the IoU
    iou = intersection_area / union_area
    return iou

In [11]:
def find_best_iou_and_bbox(row):
    ground_truth_bbox = row['bounding_box']
    best_iou = 0
    best_bbox = None

    for pope_bbox in row['gt_bboxes']:
        iou = calculate_iou(ground_truth_bbox, pope_bbox)
        # iou = get_iou(ground_truth_bbox, pope_bbox)
        if iou > best_iou:
            best_iou = iou
            best_bbox = pope_bbox

    return pd.Series([best_iou, best_bbox], index=['IoU', 'corresponding_pope_bbox'])

# Assuming the DataFrame 'df' has columns 'bbox' and 'scaled_bboxes'
outputs_df[['IoU', 'corresponding_pope_bbox']] = outputs_df.apply(find_best_iou_and_bbox, axis=1)

# Display the updated DataFrame to see the new 'IoU' and 'corresponding_pope_bbox' columns
outputs_df

Unnamed: 0,question_id,object_name,bounding_box,file_name,gt_bboxes,height,width,IoU,corresponding_pope_bbox
0,COCO_val2014_000000006033.jpg,Man,"[188, 83, 367, 533]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480,0.945278,"[190.86, 86.21, 363.1, 528.28]"
1,COCO_val2014_000000006033.jpg,Goat,"[125, 293, 290, 549]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480,0.875785,"[130.26, 293.9, 305.4, 551.06]"
2,COCO_val2014_000000006033.jpg,Van,"[0, 0, 450, 363]",COCO_val2014_000000006033.jpg,"[[130.26, 293.9, 305.4, 551.06], [190.86, 86.2...",640,480,0.939718,"[4.31, 2.88, 458.79, 353.8]"
3,COCO_val2014_000000012333.jpg,Woman,"[401, 79, 494, 262]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 629.38, 410.17], [325.06, 268...",416,640,0.973984,"[402.0, 80.44, 493.51, 262.43]"
4,COCO_val2014_000000012333.jpg,Horse,"[314, 64, 636, 409]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 629.38, 410.17], [325.06, 268...",416,640,0.961223,"[317.38, 62.31, 629.38, 410.17]"
5,COCO_val2014_000000012333.jpg,Crowd,"[0, 60, 638, 288]",COCO_val2014_000000012333.jpg,"[[317.38, 62.31, 629.38, 410.17], [325.06, 268...",416,640,0.670805,"[1, 59, 639, 398]"
6,COCO_val2014_000000007320.jpg,Person,"[90, 422, 231, 499]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 306.61, 190.1], [304.99, 0.01,...",500,375,0.886778,"[94.61, 418.45, 226.56, 500.0]"
7,COCO_val2014_000000007320.jpg,Remote,"[138, 287, 223, 499]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 306.61, 190.1], [304.99, 0.01,...",500,375,0.89659,"[142.45, 289.82, 220.01999999999998, 500.0]"
8,COCO_val2014_000000007320.jpg,Game,"[51, 24, 298, 183]",COCO_val2014_000000007320.jpg,"[[50.91, 18.06, 306.61, 190.1], [304.99, 0.01,...",500,375,0.892758,"[50.91, 18.06, 306.61, 190.1]"
9,COCO_val2014_000000001171.jpg,Engine,"[74, 34, 491, 379]",COCO_val2014_000000001171.jpg,"[[74.67, 35.42, 493.97, 378.13], [0.0, 303.25,...",426,640,0.984799,"[74.67, 35.42, 493.97, 378.13]"


In [12]:
def calculate_accuracy(df, column_name, iou_threshold=0.5):
    # Count the number of IoU values greater than the threshold
    count_above_threshold = (df[column_name] > iou_threshold).sum()
    
    # Calculate the total number of IoU entries
    total_count = len(df[column_name])
    
    # Calculate accuracy as the percentage of IoUs above the threshold
    accuracy = (count_above_threshold / total_count) * 100 if total_count > 0 else 0
    return accuracy

# Calculate the accuracy for the 'outputs_df' DataFrame
iou_accuracy = calculate_accuracy(outputs_df, 'IoU')

# Print the computed accuracy
print(f"The accuracy of bounding boxes with IoU > 0.5 is {iou_accuracy:.2f}%.")


The accuracy of bounding boxes with IoU > 0.5 is 88.24%.


In [16]:
# Select the necessary columns and compute the 'hallucinates' column
return_df = outputs_df[['question_id', 'object_name', 'bounding_box', 'IoU']].copy()
return_df['hallucinates'] = return_df['IoU'] <= 0.5

# Save DataFrame to a JSON file
return_df.to_json('hallucination_objects_with_bboxes.json', orient='records', lines=True)
