In [1]:
import os
os.chdir("../..")

In [2]:
import os.path as osp
from pathlib import Path
import numpy as np
import pandas as pd
import json

In [3]:
def _add_prefix_suffix_to_path(path: str, prefix: str, suffix: str) -> str:
    base_dir, filename = os.path.split(path)
    return os.path.join(base_dir, prefix + filename + suffix)

In [4]:
pred_json_dict = {
    "sca": "amlt/exp-verb-metric/infer-train-sca-ablat-lsj-scale_lr-110423-110723.running-2/last-fp16-ollm3bv2-large-lsj-1xlr.110423.octo-4x8-v100-16g-no_pre/vg-densecap-region_descriptions/infer/infer-visual_genome-region_descriptions_v1.2.0-test.json",
    "blip2-opt-2.7b": "amlt/exp-verb-metric/infer-sam_captioner/Salesforce/blip2-opt-2.7b/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
    "blip2-opt-2.7b-coco": "amlt/exp-verb-metric/infer-sam_captioner/Salesforce/blip2-opt-2.7b-coco/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
    "blip2-opt-6.7b": "amlt/exp-verb-metric/infer-sam_captioner/Salesforce/blip2-opt-6.7b/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
    "blip2-opt-6.7b-coco": "amlt/exp-verb-metric/infer-sam_captioner/Salesforce/blip2-opt-6.7b-coco/infer-post_processed/infer-visual_genome-densecap-local-densecap-test.json.post.json",
}

In [11]:
SCORE_PREFIXS = ["noun_distance", "noun_recall", "verb_distance", "verb_recall"]
score_json_dict = {}
for score_prefix in SCORE_PREFIXS:
    score_json_dict[score_prefix] = {}
    for pred_name, pred_json_path in pred_json_dict.items():
        score_json_dict[score_prefix][pred_name] = _add_prefix_suffix_to_path(pred_json_path, score_prefix + "-", ".json")

In [12]:
# check the existence of the files
for k, v in pred_json_dict.items():
    assert osp.exists(v), f"{v} does not exist"

for score_prefix, _score_json_dict in score_json_dict.items():
    for pred_name, pred_json_path in _score_json_dict.items():
        assert osp.exists(pred_json_path), f"{pred_json_path} does not exist"

In [31]:
# load json and score and combine them into one dataframe

def read_pred_score(pred_name):
    pred_json_path = pred_json_dict[pred_name]
    with open(pred_json_path, "r") as pred_json_path:
        pred_json = json.load(pred_json_path)

    scores = []
    for score_prefix in SCORE_PREFIXS:
        score_json_path = score_json_dict[score_prefix][pred_name]
        with open(score_json_path, "r") as score_json_path:
            score_json = json.load(score_json_path)
            scores.append(np.array(score_json))
    scores = np.stack(scores, axis=-1)  # (N, num_score_type)

    if len(pred_json) != len(scores):
        raise ValueError(f"pred_json and score_json have different length: {len(pred_json)} vs {len(scores)}, in {pred_json_path} and {score_json_path}")

    conbined_json = []
    for pred, score in zip(pred_json, scores):
        pred_dict = {
        "_id": pred["_id"],
        "ref": pred["references"][0],
        "pred": pred["candidates"][0],
    }
        score_dict = {score_prefix: score[i] for i, score_prefix in enumerate(SCORE_PREFIXS)}
        conbine_dict = {**pred_dict, **score_dict}
        conbined_json.append(conbine_dict)

    conbined_df = pd.DataFrame(conbined_json)
    return conbined_df

pred_name = "sca"
conbined_df = read_pred_score(pred_name)
conbined_df.head()

Unnamed: 0,_id,ref,pred,noun_distance,noun_recall,verb_distance,verb_recall
0,0,button on the mans jacket,a button on a jacket,0.690787,0.666667,0.0,0.0
1,1,photography website for the company that took ...,the name of the photographer,0.521651,0.0,0.0,0.0
2,2,photographer logo and name,the writing is in white,0.23467,0.0,0.0,0.0
3,3,black tie the man is wearing,The man is wearing a tie.,1.0,1.0,1.0,1.0
4,4,a vest that is under the coat,The man is wearing a black vest.,0.74347,0.5,0.0,0.0


In [32]:
conbined_dict = {}
for pred_name in pred_json_dict.keys():
    conbined_dict[pred_name] = read_pred_score(pred_name)


In [41]:
combined_df = pd.concat([conbined_dict[df_name].add_prefix(df_name + "-") for df_name in pred_json_dict.keys()], axis=1)

In [42]:
combined_df.head()

Unnamed: 0,sca-_id,sca-ref,sca-pred,sca-noun_distance,sca-noun_recall,sca-verb_distance,sca-verb_recall,blip2-opt-2.7b-_id,blip2-opt-2.7b-ref,blip2-opt-2.7b-pred,...,blip2-opt-6.7b-noun_recall,blip2-opt-6.7b-verb_distance,blip2-opt-6.7b-verb_recall,blip2-opt-6.7b-coco-_id,blip2-opt-6.7b-coco-ref,blip2-opt-6.7b-coco-pred,blip2-opt-6.7b-coco-noun_distance,blip2-opt-6.7b-coco-noun_recall,blip2-opt-6.7b-coco-verb_distance,blip2-opt-6.7b-coco-verb_recall
0,0,button on the mans jacket,a button on a jacket,0.690787,0.666667,0.0,0.0,0,button on the mans jacket,a black and white photo of a person in a dark ...,...,0.0,0.0,0.0,0,button on the mans jacket,a blurry image of a person standing in front o...,0.314736,0.0,0.0,0.0
1,1,photography website for the company that took ...,the name of the photographer,0.521651,0.0,0.0,0.0,1,photography website for the company that took ...,a black and white image of a line of white lin...,...,0.0,0.0,0.0,1,photography website for the company that took ...,a close up of a clock with a black background\n,0.403307,0.0,0.0,0.0
2,2,photographer logo and name,the writing is in white,0.23467,0.0,0.0,0.0,2,photographer logo and name,a man is standing in front of a blue background\n,...,0.0,0.0,0.0,2,photographer logo and name,a close up of a man wearing a white shirt and ...,0.282883,0.0,0.0,0.0
3,3,black tie the man is wearing,The man is wearing a tie.,1.0,1.0,1.0,1.0,3,black tie the man is wearing,a man wearing a suit and tie\n,...,0.5,1.0,1.0,3,black tie the man is wearing,a close up of a woman's legs in a pair of high...,0.591417,0.0,0.0,0.0
4,4,a vest that is under the coat,The man is wearing a black vest.,0.74347,0.5,0.0,0.0,4,a vest that is under the coat,a man is sitting on a chair\n,...,0.0,0.0,0.0,4,a vest that is under the coat,a close up of a pair of shoes with a person we...,0.460147,0.0,0.0,0.0


In [63]:
filtered_combined_df = combined_df[[i for i in combined_df.columns if "verb" in i or "pred" in i or "ref" in i]]
filtered_combined_df.head()

Unnamed: 0,sca-ref,sca-pred,sca-verb_distance,sca-verb_recall,blip2-opt-2.7b-ref,blip2-opt-2.7b-pred,blip2-opt-2.7b-verb_distance,blip2-opt-2.7b-verb_recall,blip2-opt-2.7b-coco-ref,blip2-opt-2.7b-coco-pred,blip2-opt-2.7b-coco-verb_distance,blip2-opt-2.7b-coco-verb_recall,blip2-opt-6.7b-ref,blip2-opt-6.7b-pred,blip2-opt-6.7b-verb_distance,blip2-opt-6.7b-verb_recall,blip2-opt-6.7b-coco-ref,blip2-opt-6.7b-coco-pred,blip2-opt-6.7b-coco-verb_distance,blip2-opt-6.7b-coco-verb_recall
0,button on the mans jacket,a button on a jacket,0.0,0.0,button on the mans jacket,a black and white photo of a person in a dark ...,0.0,0.0,button on the mans jacket,a blurry picture of a black and white picture ...,0.0,0.0,button on the mans jacket,a black and white photo of a person with a whi...,0.0,0.0,button on the mans jacket,a blurry image of a person standing in front o...,0.0,0.0
1,photography website for the company that took ...,the name of the photographer,0.0,0.0,photography website for the company that took ...,a black and white image of a line of white lin...,0.0,0.0,photography website for the company that took ...,a blurry image of a bunch of white lines on a ...,0.0,0.0,photography website for the company that took ...,a black and white image of a wave pattern\n,0.0,0.0,photography website for the company that took ...,a close up of a clock with a black background\n,0.0,0.0
2,photographer logo and name,the writing is in white,0.0,0.0,photographer logo and name,a man is standing in front of a blue background\n,0.0,0.0,photographer logo and name,a man is holding a camera and taking a picture\n,0.0,0.0,photographer logo and name,profile picture for sc photography\n,0.0,0.0,photographer logo and name,a close up of a man wearing a white shirt and ...,0.0,0.0
3,black tie the man is wearing,The man is wearing a tie.,1.0,1.0,black tie the man is wearing,a man wearing a suit and tie\n,1.0,1.0,black tie the man is wearing,a man wearing a tie and a shirt with a tie\n,1.0,1.0,black tie the man is wearing,a man wearing a suit and tie\n,1.0,1.0,black tie the man is wearing,a close up of a woman's legs in a pair of high...,0.0,0.0
4,a vest that is under the coat,The man is wearing a black vest.,0.0,0.0,a vest that is under the coat,a man is sitting on a chair\n,0.0,0.0,a vest that is under the coat,a black cat sitting on a couch with a blanket ...,0.0,0.0,a vest that is under the coat,a man is sitting on a chair\n,0.0,0.0,a vest that is under the coat,a close up of a pair of shoes with a person we...,0.0,0.0


In [67]:
df = filtered_combined_df

In [72]:
df.loc[(df['blip2-opt-2.7b-verb_distance'] <= 0.6) & (df['blip2-opt-2.7b-verb_distance'] >= 0.4)].sort_values(by='blip2-opt-2.7b-verb_distance', ascending=False).head(50)

Unnamed: 0,sca-ref,sca-pred,sca-verb_distance,sca-verb_recall,blip2-opt-2.7b-ref,blip2-opt-2.7b-pred,blip2-opt-2.7b-verb_distance,blip2-opt-2.7b-verb_recall,blip2-opt-2.7b-coco-ref,blip2-opt-2.7b-coco-pred,blip2-opt-2.7b-coco-verb_distance,blip2-opt-2.7b-coco-verb_recall,blip2-opt-6.7b-ref,blip2-opt-6.7b-pred,blip2-opt-6.7b-verb_distance,blip2-opt-6.7b-verb_recall,blip2-opt-6.7b-coco-ref,blip2-opt-6.7b-coco-pred,blip2-opt-6.7b-coco-verb_distance,blip2-opt-6.7b-coco-verb_recall
12766,truck hauling an airplane's cargo,A truck is next to the plane.,0.0,0.0,truck hauling an airplane's cargo,a truck is driving down the road\n,0.599886,0.0,truck hauling an airplane's cargo,a truck is carrying boxes on the back of it\n,0.726537,0.0,truck hauling an airplane's cargo,a truck with a load of boxes on the back\n,0.0,0.0,truck hauling an airplane's cargo,a truck with a load of furniture on the back o...,0.0,0.0
238035,sliding glass door,the window is open,0.0,0.0,sliding glass door,a man is holding a white towel\n,0.599672,0.0,sliding glass door,a blurry picture of a cell phone with a red li...,0.0,0.0,sliding glass door,a person is holding a cell phone in front of a...,0.599672,0.0,sliding glass door,a blurry picture of a man standing in front of...,0.612457,0.0
13302,Tree trunk providing shade,the trunk of a tree,0.0,0.0,Tree trunk providing shade,a man is standing in front of a building\n,0.599651,0.0,Tree trunk providing shade,a blurry image of a giraffe standing in a field\n,0.599651,0.0,Tree trunk providing shade,a person is standing in front of a building\n,0.599651,0.0,Tree trunk providing shade,a man standing in the shade of a tree with a s...,0.599651,0.0
123060,railing is green,a green fence on the side of the platform,0.0,0.0,railing is green,a train is pulling into a station\n,0.599623,0.0,railing is green,a man is walking down a sidewalk with a skateb...,0.495665,0.0,railing is green,a train is coming\n,0.41808,0.0,railing is green,a man walking down a sidewalk next to a green ...,0.495665,0.0
96716,person siting on the side,a man standing in front of a wall,0.742729,0.0,person siting on the side,a man is painting a wall\n,0.599327,0.0,person siting on the side,a woman is sitting on a chair in a bathroom\n,0.656225,0.0,person siting on the side,a man standing in front of a large white board\n,0.742729,0.0,person siting on the side,a woman standing in front of a large white boa...,0.742729,0.0
123993,man eating chocolate glazed doughnut with spri...,a man eating a donut,0.599321,0.5,man eating chocolate glazed doughnut with spri...,a man eating a doughnut\n,0.599321,0.5,man eating chocolate glazed doughnut with spri...,a man eating a doughnut with sprinkles on it\n,0.599321,0.5,man eating chocolate glazed doughnut with spri...,a man eating a donut\n,0.599321,0.5,man eating chocolate glazed doughnut with spri...,a man with glasses eating a doughnut with spri...,0.599321,0.5
108952,Catcher crouching down in the dirt,the catcher is squatting,0.713759,0.0,Catcher crouching down in the dirt,a baseball player is throwing a ball\n,0.599297,0.0,Catcher crouching down in the dirt,a blurry picture of a table with a bunch of st...,0.0,0.0,Catcher crouching down in the dirt,a baseball player is swinging a bat\n,0.648913,0.0,Catcher crouching down in the dirt,a blurry image of a baseball player in a unifo...,0.563707,0.0
188897,ladels and whisk hanging from ceiling,utensils hanging from the ceiling,0.599189,0.5,ladels and whisk hanging from ceiling,a kitchen with a mixer and a whisk hanging fro...,0.599189,0.5,ladels and whisk hanging from ceiling,a close up of a mixer with a whisk and a bowl\n,0.0,0.0,ladels and whisk hanging from ceiling,a kitchen with a stove and a mixer\n,0.0,0.0,ladels and whisk hanging from ceiling,a ceiling fan and a metal whisk hanging from a...,0.599189,0.5
139590,a cutting boarding leaning against a wall,a wooden cutting board,0.0,0.0,a cutting boarding leaning against a wall,a wooden cutting board sitting on top of a sink\n,0.59917,0.0,a cutting boarding leaning against a wall,a man is sitting on a chair with a book in fro...,0.59917,0.0,a cutting boarding leaning against a wall,a sink with a faucet and a wooden cutting board\n,0.0,0.0,a cutting boarding leaning against a wall,a close up of a sink with a faucet and a towel\n,0.0,0.0
184970,Guy wearing white shirt and tie,a man wearing a white shirt,0.599068,0.5,Guy wearing white shirt and tie,a man wearing a white shirt\n,0.599068,0.5,Guy wearing white shirt and tie,a man in a white shirt and tie standing next t...,0.757276,0.5,Guy wearing white shirt and tie,a man wearing a white shirt\n,0.599068,0.5,Guy wearing white shirt and tie,a man in a white shirt and tie standing outside\n,0.757276,0.5
