In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import copy
import json
import os
import pickle
import typing
from collections import Counter, defaultdict
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Set, Tuple

import numpy as np
import pandas as pd
import PIL.Image as pil_img
import seaborn as sns
import simdjson as json
from IPython.display import display
from PIL import Image
from pycocotools.coco import COCO
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Load and Inspect the Newly Saved RefCOCO Dataset

In [3]:
REFSEG_DIR = Path("/shared/gbiamby/data/refer_seg/")

In [16]:
VALID_SPLITS = {
    "R-refcoco": ["unc"],
    "R-refcoco+": ["unc"],
    "R-refcocog": ["umd"],
    "refclef": ["berkeley", "unc"],
    "refcoco": ["google"],
    "refcoco+": ["unc"],
    "refcocog": ["google", "umd"],
    "fprefcocog_v002": ["berkeley"],
}


def build_refcoco(refseg_path: Path, dataset_name: str, split_by: str = None) -> COCO:
    assert dataset_name in VALID_SPLITS, dataset_name
    if split_by is None:
        split_by = VALID_SPLITS[dataset_name][0]
    else:
        assert split_by in VALID_SPLITS[dataset_name]
    coco = COCO(
        refseg_path / dataset_name / "instances.json",
        is_ref_dataset=True,
        dataset_name=dataset_name,
        split_by=split_by,
    )
    return coco


df_aggs = []
for ds_name in ["fprefcocog_v002"]:
    print("\n\n")
    print("=" * 220)
    print(f"Dataset: {ds_name}(berkeley)")
    coco = COCO(
        REFSEG_DIR,
        is_ref_dataset=True,
        dataset_name=ds_name,
        split_by="berkeley",
    )
    ## if you are using cocobetter you can output some stats by un-commenting these lines:
    # df_refcoco, df_refcoco_agg = coco.get_ref_stats()
    # df_aggs.append(df_refcoco_agg)


if len(df_aggs) > 0:
    df_aggs = pd.concat(df_aggs)
    display(df_aggs)




Dataset: fprefcocog_v002(berkeley)
Loading refs from '/shared/gbiamby/data/refer_seg/fprefcocog_v002/refs(berkeley).p'
Loaded 49822 refs
loading annotations into memory...
Done (t=1.15s)
creating index...
index created!


The format is nearly identical to refcoco/refcocog. The difference is there are some additional properties in the sentences objects.

`coco->images->refs->sentences`

#### True sentences vs false premise:

These two methods are equivalent ways of detecting if a sentence is a false premise one:

`s["exist"] == False` and `s["is_false_premise"]`

Usually these sentences also have `sent_id = -1`, but that is not guaranteed so don't rely on it.


In [17]:
def display_ref(ref):
    print("ref has keys: ", ref.keys())
    print(f"ref has {len(ref['sentences'])} sentences")
    for s in ref["sentences"]:
        # print(s.keys())
        print(
            f"sent_id:{s['sent_id']}, is_FP:{s['is_false_premise']}, sent: '{s['sent']}'"
        )
        if s["is_false_premise"]:
            print("\tchange_type: ", s["change_type"])
            print(f"\tparent_sent_id: {s['gt_sent_id']}, parent_sent: '{s['gt_sent']}'")


def show_refexp_example(refcoco: COCO):
    ref = refcoco.refs[1000]
    display_ref(ref)


show_refexp_example(coco)

ref has keys:  dict_keys(['image_id', 'split', 'sentences', 'file_name', 'category_id', 'ann_id', 'sent_ids', 'ref_id'])
ref has 4 sentences
sent_id:21606, is_FP:False, sent: 'a large polar bear looking at a smaller polar bear'
sent_id:21607, is_FP:False, sent: 'white polar bear looking at another bear'
sent_id:-1, is_FP:True, sent: 'a large penguin looking at a smaller penguin'
	change_type:  
	parent_sent_id: 21606, parent_sent: 'a large polar bear looking at a smaller polar bear'
sent_id:-1, is_FP:True, sent: 'white penguin looking at another penguin'
	change_type:  
	parent_sent_id: 21607, parent_sent: 'white polar bear looking at another bear'



#### Change Type

Sentences in this dataset have a new property called `change_type`, indicating what kind of change was made to convert from the original ground truth sentence to the false premise one.

In [25]:
def show_change_type(coco: COCO, change_type: str):
    for ref_id, ref in coco.refs.items():
        for s in ref["sentences"]:
            if (
                "change_type" in s
                and s["change_type"]
                and s["change_type"] == "main_subject"
            ):
                print("Change_Type: ", s["change_type"])
                display_ref(ref)
                return


# show_change_type(coco, "NOT_MAIN_SUBJ")
show_change_type(coco, "main_subject")

Change_Type:  main_subject
ref has keys:  dict_keys(['image_id', 'split', 'sentences', 'file_name', 'category_id', 'ann_id', 'sent_ids', 'ref_id'])
ref has 4 sentences
sent_id:29, is_FP:False, sent: 'a truck number 14 on a snow bank'
sent_id:30, is_FP:False, sent: 'a truck with the number 14 painted on it'
sent_id:-1, is_FP:True, sent: 'a spaceship number 14 on a snow bank'
	change_type:  NOT_MAIN_SUBJ
	parent_sent_id: 29, parent_sent: 'a truck number 14 on a snow bank'
sent_id:-1, is_FP:True, sent: 'a spaceship with the number 14 painted on it'
	change_type:  main_subject
	parent_sent_id: 30, parent_sent: 'a truck with the number 14 painted on it'
