In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import copy
import os
import pickle
from collections import Counter, defaultdict
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Set, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
import seaborn as sns
import simdjson as json
import skimage.io as io
from geo_llm_ret.fpref.ref_datasets import build_refcoco
from IPython.display import display
from PIL import Image

# If you are using normal pycocotools you will need to comment out these three lines and replace with just: from pycocotools.coco import COCO
from pycocotools.coco import COCO, Ann, Cat, Image, Ref
from pycocotools.helpers import CocoClassDistHelper, get_ref_stats
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
IMG_DIR = Path("/shared/gbiamby/data/coco/train2014")
REFSEG_DIR = Path("/shared/gbiamby/data/refer_seg")
dataset_name = "fprefcocog_v002"
split_by = "berkeley_exclude_unified"
refcoco = build_refcoco(REFSEG_DIR, dataset_name, split_by)

Loading refs from '/shared/gbiamby/data/refer_seg/fprefcocog_v002/refs(berkeley_exclude_unified).p'
Loaded 49822 refs
loading annotations into memory...
Done (t=3.46s)
creating index...
index created!


In [8]:
df_refs = []
df_aggs = []

df_refcoco, df_refcoco_agg = get_ref_stats(refcoco, L=1)
df_refcoco_agg["dataset"] = f"{dataset_name}({split_by})"
df_refcoco_agg["ann_count"] = len(refcoco.anns)
df_refcoco_agg["img_count"] = len(refcoco.imgs)

# Make 'dataset' the first column:
df_refcoco_agg.insert(0, "dataset", df_refcoco_agg.pop("dataset"))
df_aggs.append(df_refcoco_agg)

# df_refs
df_refcoco["dataset"] = f"{dataset_name}({split_by})"
df_refcoco.insert(0, "dataset", df_refcoco.pop("dataset"))
df_refs.append(df_refcoco)

df_refs = pd.concat(df_refs)
df_aggs = pd.concat(df_aggs)

display(df_refs)
display(df_aggs)

pos/neg sentence_counts:  95010 91240


Unnamed: 0,dataset,ref_id,ann_id,category_id,category,supercategory,sent_count,pos_sent_count,neg_sent_count
0,fprefcocog_v002(berkeley_exclude_unified),0,298801,32,tie,accessory,4,2,2
1,fprefcocog_v002(berkeley_exclude_unified),1,453172,1,person,person,4,2,2
2,fprefcocog_v002(berkeley_exclude_unified),2,401571,8,truck,vehicle,4,2,2
3,fprefcocog_v002(berkeley_exclude_unified),3,584873,22,elephant,animal,4,2,2
4,fprefcocog_v002(berkeley_exclude_unified),4,1154908,86,vase,indoor,4,2,2
...,...,...,...,...,...,...,...,...,...
49817,fprefcocog_v002(berkeley_exclude_unified),49817,135604,3,car,vehicle,2,1,1
49818,fprefcocog_v002(berkeley_exclude_unified),49818,2166645,1,person,person,4,2,2
49819,fprefcocog_v002(berkeley_exclude_unified),49819,56109,19,horse,animal,4,2,2
49820,fprefcocog_v002(berkeley_exclude_unified),49820,584687,22,elephant,animal,4,2,2


Unnamed: 0,dataset,pos_sent_count,num_refs,sent_count,total_pos_sents,total_neg_sents,ann_count,img_count
0,fprefcocog_v002(berkeley_exclude_unified),1,4714,9185,4714,4471,208960,25799
1,fprefcocog_v002(berkeley_exclude_unified),2,45028,176596,90056,86540,208960,25799
2,fprefcocog_v002(berkeley_exclude_unified),3,80,469,240,229,208960,25799


---

# Load LISA Preds

In [95]:
def load_fpref_preds(preds_path: Path) -> list[dict[str, Any]]:
    """
    Load inference results from disk, i.e., load outputs of `lisa_validation_new.py` (or similar
    script).
    """
    with open(preds_path, "r") as json_file:
        data = json.load(json_file)
        print(f"Loaded {len(data)} items (images) from preds file: {preds_path}")
        return data


def get_sentence_lookup(refcoco: COCO):
    """Helper method that returns dict whose keys are a tuple of (image_id, ref_id, sent_idx),
    values are sent dicts"""
    sent_lookup = {}
    for image_id, refs in refcoco.img_to_refs.items():
        for ref in refs:
            for sent_idx, sent in enumerate(ref["sentences"]):
                sent_lookup[(image_id, ref["ref_id"], sent_idx)] = sent
    return sent_lookup


def is_false_premise(sample: dict) -> bool:
    if "gt_exist" in sample:
        is_fp = not sample["gt_exist"]
    elif "exists" in sample:
        is_fp = not sample["exists"]
    elif "is_false_premise" in sample:
        is_fp = sample["is_false_premise"]
    else:
        is_fp = False
    return is_fp


def match_with_refcoco(
    preds: dict[str, dict[str, Any]], refseg_dir, dataset_name: str, split_by: str
):
    """
    Combines inference outputs (predictions) with data from the corresponding refcoco dataset. For
    example the refcoco dataset has information about which type of modification has been done to
    make a sentence a False Premise, or which ground truth sentence each FP sentence maps to, and
    what all the ground truth referring expression sentences for the image are.

    Input: `preds` is the prediction outputs of `lisa_validation_new.py` (or similar script). Those
    results are a dictionary whose keys are image_id (int), and values are a set of predictions for
    all the sentences mapped to that image.

    Output is a flattened list of results where each element corresponds to a sentence. The image
    info is repeated for each sentence. Therefore the output will be longer than the input.
    """
    assert refseg_dir.exists(), str(refseg_dir)
    refcoco = build_refcoco(refseg_dir, dataset_name, split_by)
    sent_lookup = get_sentence_lookup(refcoco)
    path_to_img: dict[str, Image] = {
        Path(img["file_name"]).name: img for img_id, img in refcoco.imgs.items()
    }
    preds_merged = []
    for image_path, img_preds in preds.items():
        assert len(img_preds["pred_sent"]) == len(
            img_preds["conversation_list"]
        ), "lengths don't match: pred_sent and conversation_list"
        assert len(img_preds["pred_sent"]) == len(
            img_preds["gt_exists"]
        ), "lengths don't match: pred_sent and gt_exists"
        assert len(img_preds["pred_sent"]) == len(
            img_preds["ref_ids"][0]
        ), "lengths don't match: pred_sent and ref_ids     "
        assert len(img_preds["pred_sent"]) == len(
            img_preds["sent_idxs"][0]
        ), "lengths don't match: pred_sent and sent_idxs"
        assert len(img_preds["pred_sent"]) == len(
            img_preds["pred_exists"]
        ), "lengths don't match: pred_sent and pred_exists"

        image: Image = path_to_img[Path(image_path).name]
        img_gt_sents = []
        sent_to_ref = {}
        for ref in refcoco.img_to_refs[image["id"]]:
            for sent_idx, sent in enumerate(ref["sentences"]):
                sent_to_ref[(ref["ref_id"], sent_idx)] = ref
                if not is_false_premise(sent):
                    img_gt_sents.append(sent)

        for conversation, pred_sent, gt_exist, ref_id, sent_idx, pred_exist in zip(
            img_preds["conversation_list"],
            img_preds["pred_sent"],
            img_preds["gt_exists"],
            img_preds["ref_ids"][0],
            img_preds["sent_idxs"][0],
            img_preds["pred_exists"],
        ):
            sent = sent_lookup[(image["id"], ref_id, sent_idx)]
            ref = sent_to_ref[(ref_id, sent_idx)]

            # Gather gt sents for the current ref:
            ref_gt_sents = []
            for _ref in refcoco.img_to_refs[image["id"]]:
                for _sent in _ref["sentences"]:
                    if _ref["ref_id"] == ref_id:
                        if not is_false_premise(_sent):
                            ref_gt_sents.append(_sent)

            merged_pred = {
                "image_id": image["id"],
                "image_path": image_path,
                "conversation": conversation,
                "pred_sent": pred_sent,
                "gt_exist": gt_exist,
                "ref_id": ref["ref_id"],
                "ref_split": ref["split"],
                "ann_id": ref["ann_id"],
                "sent_idx": sent_idx,
                # "pred_exist": pred_exist,
                "sent": sent,
                # "img_gt_sents": img_gt_sents,
                # "ref_gt_sents": ref_gt_sents,
            }
            preds_merged.append(merged_pred)
    return preds_merged


results_paths = [
    # LISA
    "/home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcoco_val/fprefcoco|berkeley_exclude_unified|val.json",
    "/home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcoco+_val/fprefcoco+|berkeley_exclude_unified|val.json",
    "/home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcocog_val/fprefcocog|berkeley_exclude_unified|val.json",
    # # Hollistic (ours):
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_001/fprefcoco|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_001/fprefcoco+|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_001/fprefcocog|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
    # LLAVA + LISA (ours):
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_cascading_002/fprefcoco|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_cascading_002/fprefcoco+|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
    # "/home/gbiamby/proj/geo-llm-ret/lib/clair/output/clair_scores_cascading_002/fprefcocog|berkeley_exclude_unified|val-clair_scores-model_gpt-4-turbo.json",
]
dfs = []
preds_raw = {}
for p in results_paths:
    ds_name, split_by = Path(p).stem.split("|")[0], Path(p).stem.split("|")[1]
    if not ds_name.endswith("v002"):
        ds_name += "_v002"
    print("loading clair results for ", ds_name)
    preds_raw[ds_name] = load_fpref_preds(p)
    preds_flat = match_with_refcoco(preds_raw[ds_name], REFSEG_DIR, ds_name, split_by)
    df = pd.DataFrame(preds_flat)
    df.insert(0, "ds", ds_name)
    dfs.append(df)
df = pd.concat(dfs)

# Done, show preview:
print("num preds loaded: ", len(df))
pd.options.display.float_format = "{:,.2f}".format
pd.set_option("display.max_colwidth", None)
df.head(3)

loading clair results for  fprefcoco_v002
Loaded 1500 items (images) from preds file: /home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcoco_val/fprefcoco|berkeley_exclude_unified|val.json
Loading refs from '/shared/gbiamby/data/refer_seg/fprefcoco_v002/refs(berkeley_exclude_unified).p'
Loaded 50000 refs
loading annotations into memory...
Done (t=1.07s)
creating index...
index created!
loading clair results for  fprefcoco+_v002
Loaded 1500 items (images) from preds file: /home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcoco+_val/fprefcoco+|berkeley_exclude_unified|val.json
Loading refs from '/shared/gbiamby/data/refer_seg/fprefcoco+_v002/refs(berkeley_exclude_unified).p'
Loaded 49856 refs
loading annotations into memory...
Done (t=1.07s)
creating index...
index created!
loading clair results for  fprefcocog_v002
Loaded 1300 items (images) from preds file: /home/patrickwu/new_lisa/neg_refer_llm/LISA_result/fprefcocog_val/fprefcocog|berkeley_exclude_unified|val.json
Loading 

Unnamed: 0,ds,image_id,image_path,conversation,pred_sent,gt_exist,ref_id,ref_split,ann_id,sent_idx,sent
0,fprefcoco_v002,559760,/home/patrickwu/LISA/dataset_new/refer_seg/images/mscoco/images/train2014/COCO_train2014_000000559760.jpg,"Please segment guy on right in this image if it exists. Otherwise, tell me the object doesn't exist and optionally offer an alternative.","Sure, [SEG] .",True,2038,val,480245,0,"{'tokens': ['guy', 'on', 'right'], 'raw': 'guy on right', 'sent_id': 5819, 'sent': 'guy on right', 'spcy_WORD': ['guy', 'on', 'right'], 'spcy_DEP': ['ROOT', 'prep', 'pobj'], 'spcy_POS': ['NOUN', 'ADP', 'INTJ'], 'spcy_LEM': ['guy', 'on', 'right'], 'spcy_TAG': ['NN', 'IN', 'UH'], 'spcy_IS_STOP': [False, True, False], 'spcy_ENTS': [], 'spcy_NOUN_CHUNKS': ['guy'], 'is_false_premise': False, 'exist': True}"
1,fprefcoco_v002,559760,/home/patrickwu/LISA/dataset_new/refer_seg/images/mscoco/images/train2014/COCO_train2014_000000559760.jpg,"Please segment short guy in this image if it exists. Otherwise, tell me the object doesn't exist and optionally offer an alternative.","Sure, it is [SEG] .",True,2038,val,480245,1,"{'tokens': ['short', 'guy'], 'raw': 'short guy', 'sent_id': 5820, 'sent': 'short guy', 'spcy_WORD': ['short', 'guy'], 'spcy_DEP': ['amod', 'ROOT'], 'spcy_POS': ['ADJ', 'NOUN'], 'spcy_LEM': ['short', 'guy'], 'spcy_TAG': ['JJ', 'NN'], 'spcy_IS_STOP': [False, False], 'spcy_ENTS': [], 'spcy_NOUN_CHUNKS': ['short guy'], 'is_false_premise': False, 'exist': True}"
2,fprefcoco_v002,559760,/home/patrickwu/LISA/dataset_new/refer_seg/images/mscoco/images/train2014/COCO_train2014_000000559760.jpg,"Please segment right short guy in this image if it exists. Otherwise, tell me the object doesn't exist and optionally offer an alternative.","Sure, it is [SEG] .",True,2038,val,480245,2,"{'tokens': ['right', 'short', 'guy'], 'raw': 'right short guy', 'sent_id': 5821, 'sent': 'right short guy', 'spcy_WORD': ['right', 'short', 'guy'], 'spcy_DEP': ['intj', 'compound', 'ROOT'], 'spcy_POS': ['INTJ', 'PROPN', 'NOUN'], 'spcy_LEM': ['right', 'short', 'guy'], 'spcy_TAG': ['UH', 'NNP', 'NN'], 'spcy_IS_STOP': [False, False, False], 'spcy_ENTS': ['short guy'], 'spcy_NOUN_CHUNKS': ['right short guy'], 'is_false_premise': False, 'exist': True}"


In [96]:
df["has_seg_token"] = df.pred_sent.apply(lambda row: "[SEG]" in row.upper())
df["pred_id"] = df.apply(
    lambda row: (
        row["ds"],
        row["image_id"],
        row["ref_id"],
        row["sent_idx"],
        row["gt_exist"],
    ),
    axis=1,
)
display(df.sort_values("pred_id").head(2))

Unnamed: 0,ds,image_id,image_path,conversation,pred_sent,gt_exist,ref_id,ref_split,ann_id,sent_idx,sent,has_seg_token,pred_id
8755,fprefcoco+_v002,716,/home/patrickwu/LISA/dataset_new/refer_seg/images/mscoco/images/train2014/COCO_train2014_000000000716.jpg,"Please segment pesron near large light blue wall in this image if it exists. Otherwise, tell me the object doesn't exist and optionally offer an alternative.","Sure, it is [SEG] .",True,49800,val,1262399,0,"{'tokens': ['pesron', 'near', 'large', 'light', 'blue', 'wall'], 'raw': 'Pesron near large light blue wall', 'sent_id': 141409, 'sent': 'pesron near large light blue wall', 'spcy_WORD': ['pesron', 'near', 'large', 'light', 'blue', 'wall'], 'spcy_DEP': ['ROOT', 'prep', 'amod', 'amod', 'amod', 'pobj'], 'spcy_POS': ['NOUN', 'ADP', 'ADJ', 'ADJ', 'ADJ', 'NOUN'], 'spcy_LEM': ['pesron', 'near', 'large', 'light', 'blue', 'wall'], 'spcy_TAG': ['NN', 'IN', 'JJ', 'JJ', 'JJ', 'NN'], 'spcy_IS_STOP': [False, False, False, False, False, False], 'spcy_ENTS': [], 'spcy_NOUN_CHUNKS': ['pesron', 'large light blue wall'], 'is_false_premise': False, 'exist': True}",True,"(fprefcoco+_v002, 716, 49800, 0, True)"
8756,fprefcoco+_v002,716,/home/patrickwu/LISA/dataset_new/refer_seg/images/mscoco/images/train2014/COCO_train2014_000000000716.jpg,"Please segment person standing in this image if it exists. Otherwise, tell me the object doesn't exist and optionally offer an alternative.","Sure, it is [SEG] .",True,49800,val,1262399,1,"{'tokens': ['person', 'standing'], 'raw': 'person standing', 'sent_id': 141410, 'sent': 'person standing', 'spcy_WORD': ['person', 'standing'], 'spcy_DEP': ['compound', 'ROOT'], 'spcy_POS': ['NOUN', 'NOUN'], 'spcy_LEM': ['person', 'standing'], 'spcy_TAG': ['NN', 'NN'], 'spcy_IS_STOP': [False, False], 'spcy_ENTS': [], 'spcy_NOUN_CHUNKS': ['person standing'], 'is_false_premise': False, 'exist': True}",True,"(fprefcoco+_v002, 716, 49800, 1, True)"


In [97]:
def flatten(df: pd.DataFrame) -> pd.DataFrame:
    df.columns = [f"{x}_{y}" for x, y in df.columns.to_flat_index()]
    return df

In [103]:
df_summ = flatten(
    df.groupby(["ds"], as_index=False).agg(
        {
            "image_id": ["nunique"],
            "pred_id": ["nunique", "count"],
            "gt_exist": ["sum", ],
            "has_seg_token": ["sum", ],
        }
    )
)
display(df_summ)

Unnamed: 0,ds_,image_id_nunique,pred_id_nunique,pred_id_count,gt_exist_sum,has_seg_token_sum
0,fprefcoco+_v002,1500,20962,20962,10758,20962
1,fprefcoco_v002,1500,21094,21094,10834,21094
2,fprefcocog_v002,1300,9554,9554,4896,9554


In [99]:
df_summ = flatten(
    df.groupby(["ds", "has_seg_token"], as_index=False).agg(
        {
            "image_id": ["nunique"],
        }
    )
)
display(df_summ)

Unnamed: 0,ds_,has_seg_token_,image_id_nunique
0,fprefcoco+_v002,True,1500
1,fprefcoco_v002,True,1500
2,fprefcocog_v002,True,1300


In [101]:
df_summ = flatten(
    df.groupby(["ds", "pred_id"], as_index=False).agg(
        {"image_id": ["nunique"], "pred_id": ["nunique"]}
    )
).sort_values(["pred_id_nunique"], ascending=False)
display(df_summ)

Unnamed: 0,ds_,image_id_nunique,pred_id_nunique
0,fprefcoco+_v002,1,1
34410,fprefcoco_v002,1,1
34400,fprefcoco_v002,1,1
34401,fprefcoco_v002,1,1
34402,fprefcoco_v002,1,1
...,...,...,...
17206,fprefcoco+_v002,1,1
17207,fprefcoco+_v002,1,1
17208,fprefcoco+_v002,1,1
17209,fprefcoco+_v002,1,1
