## neg_refcocov002.ipynb Create a false premise referring expressions dataset

Create a COCO formatted dataset that uses `gpt-3.5-turbo` to create false premise referring expressions that refer to objects that do not exist in the image. 

## Types of Modifications

We ask GPT to modify the ground truth referring expressions for each image.
We categorize each FP according to the type of modification. We have three categories:

- Modify the main subject of the sentence. This means changing from one noun or noun phrase to another one. "A woman..." -> "A cat..."
- Modify an attribute of the main subject. "A tall man..." -> "A short man"
- Modify some other portion of the description. This usually means either modifying a spatial relation, or a participatory object that the expression relates somehow to the main subject, or sometimes an attribute of the participatory object.

## File Format
The referring expressions follow same format as refcoco/refcocog/refcoco+/R-refcoco/etc, i.e., a COCO formatted json file, accompanied by a file with a `.p` extension, which contains the true and false referring expressions. The `.p` file is a python pickle file. These datasets can be loaded using the common `refer.py`, or the `COCO` class in `github.com/GiscardBiamby/cocobetter.git`. Examples can be found later in this notebook.



In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
%pip list | grep json

fastjsonschema                    2.18.0
json5                             0.9.14
jsonpointer                       2.4
jsons                             1.6.3
jsonschema                        4.19.1
jsonschema-specifications         2023.7.1
pysimdjson                        5.0.2
python-json-logger                2.0.7
python-lsp-jsonrpc                1.1.1
ujson                             5.8.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
import argparse
import copy
import csv
import decimal
import json
import os
import pickle
import typing
from collections import Counter, defaultdict
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Set, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL.Image as pil_img
import seaborn as sns
import simdjson as json
from IPython.display import display
from PIL import Image
from pycocotools.coco import COCO, Ann, Cat, Image, Ref
from pycocotools.helpers import CocoClassDistHelper, CocoJsonBuilder
from pycocotools.helpers.coco_builder import COCOShrinker
from tqdm.auto import tqdm

# from geo_llm_ret.ref_datasets import build_ref_coco

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
COCO_DIR = Path("/shared/gbiamby/data/coco")
IMG_DIR = COCO_DIR / "val2017"

In [5]:
VALID_SPLITS = {
    "R-refcoco": ["unc"],
    "R-refcoco+": ["unc"],
    "R-refcocog": ["umd"],
    "refclef": ["berkeley", "unc"],
    "refcoco": ["google"],
    "refcoco+": ["unc"],
    "refcocog": ["google", "umd"],
    # "refcoconeg_v001_train2014": ["berkeley"],
    # "refcoconeg_v001_train2014_mini": ["berkeley"],
    # "refcoconeg_v001_val2014": ["berkeley"],
    # "refcoconeg_v001_val2014_mini": ["berkeley"],
    # "refcoconeg_v001_train2017": ["berkeley"],
    # "refcoconeg_v001_train2017_mini": ["berkeley"],
    # "refcoconeg_v001_val2017": ["berkeley"],
    # "refcoconeg_v001_val2017_mini": ["berkeley"],
}


def build_refcoco(refseg_path: Path, dataset_name: str, split_by: str = None) -> COCO:
    assert dataset_name in VALID_SPLITS, dataset_name
    if split_by is None:
        split_by = VALID_SPLITS[dataset_name][0]
    else:
        assert split_by.replace("_enhanced", "") in VALID_SPLITS[dataset_name]
    coco = COCO(
        refseg_path / dataset_name / "instances.json",
        is_ref_dataset=True,
        dataset_name=dataset_name,
        split_by=split_by,
    )
    return coco


IMG_DIR = Path("/shared/gbiamby/data/coco/train2014")
PROJ_ROOT = Path("../../../../../").resolve()
assert PROJ_ROOT.exists()
# REFSEG_DIR = Path("/shared/gbiamby/data/refer_seg")
REFSEG_DIR = Path("output/ref_seg")
refcoco = build_refcoco(REFSEG_DIR, "refcocog", "google_enhanced")

Loading refs from '/home/gbiamby/proj/geo-llm-ret/lib/cocobetter/PythonAPI/notebooks/ref_correct/output/ref_seg/refcocog/refs(google_enhanced).p'
Loaded 49822 refs
loading annotations into memory...
Done (t=2.97s)
creating index...
index created!


In [6]:
api_results_dir = (
    PROJ_ROOT / "output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo"
)
assert api_results_dir.exists(), str(api_results_dir)
assert api_results_dir.is_dir(), str(api_results_dir)


def load_api_responses(api_results_dir: Path, max_results: int = None) -> list[dict]:
    response_files = sorted(api_results_dir.glob("responses/img_id_*.json"))
    if max_results is not None and max_results > 0:
        response_files = response_files[:max_results]
    results = []
    for f in tqdm(response_files):
        with open(f, "r", encoding="utf-8") as json_file:
            result = json.load(json_file)
            results.append(result)

    print(f"Loaded {len(results)} responses from {api_results_dir}")
    return results


api_responses = load_api_responses(api_results_dir)
print("Example response: ")
display(api_responses[0])

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25798/25798 [00:01<00:00, 13273.18it/s]

Loaded 25798 responses from /home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo
Example response: 





{'api_response': {'choices': [{'finish_reason': 'stop',
    'index': 0,
    'message': {'content': 'Altered Descriptions: ["a purple , oval shaped bowl filled with rainbow-colored cereal and candies", "cereal with candies", "the bag and the veggies on the left side", "a container with fruit and a slice of lemon in it"]',
     'role': 'assistant'}}],
  'created': 1699058754,
  'id': 'chatcmpl-8Gz5OkAkNv9p8ZOaOEs8u5KBH6Pdu',
  'model': 'gpt-3.5-turbo-0613',
  'object': 'chat.completion',
  'usage': {'completion_tokens': 50,
   'prompt_tokens': 512,
   'total_tokens': 562}},
 'image_id': 100022,
 'request_info': {'ann_ids': [712188, 1899463],
  'image_id': 100022,
  'ref_ids': [47553, 48090],
  'sent_ids': [[99372, 99373], [100622, 100623]],
  'sentences': ['a pink , oval shaped bowl filled with brown rice and veggies',
   'rice with veggies',
   'the container and the veggies on the right side',
   'a container with vegetables and a slice of lime in it'],
  'subjects': ['bowl', 'rice', '

## Check Quality of the API Results - Filter Bad Responses

In [7]:
import ast

import regex as re
from simplediff import diff, string_diff


def parse_result(image_id: int, reply: str, warnings, errors) -> list[str]:
    def parse_result_main(reply: str):
        # print(f"ChatGPT Reply: \n\t{reply}")
        matches = re.match(
            # '.*Descriptions:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*(?P<descriptions>\\[\\".*\\"\\])',
            '.*Description[s]{0,1}:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*\\[{0,1}(?P<descriptions>\\".*\\")\\]{0,1}',
            reply,
            re.MULTILINE | re.DOTALL,
        )
        if matches is None:
            return None
        list_str = matches.group("descriptions")
        if not list_str.startswith("["):
            list_str = "[" + list_str
        if not list_str.endswith("]"):
            list_str = list_str + "]"
        new_sents = ast.literal_eval(list_str)
        # print("New Sents: ", new_sent)
        if new_sents is None:
            errors.append(
                {
                    "image_id": image_id,
                    "msg": f"No FP sents found (fp_sents is None)",
                    "raw_reply": reply,
                }
            )
            return None
        return new_sents

    def parse_result_multiline_list(reply: str):
        matches = re.match(
            '(?:.*Description[s]{0,1}:[ ]*(\\(.+ sentence[s]{0,1}\\):){0,1})\\n*?(?P<descriptions>\\n\\d\\.[ ]*\\"[^\\n\\"]+\\")+',
            reply,
            re.MULTILINE | re.DOTALL,
        )
        if matches is None:
            return None
        captures = matches.capturesdict()
        if (captures is None or len(captures) == 0) or (
            captures is not None and "descriptions" not in captures
        ):
            return None
        new_sents = []
        for cap in captures["descriptions"]:
            matches = re.match('(?:[\\d]\\.)\\s*?\\"(?P<sent>[^\\"]+)\\"', cap.strip())
            # print("match: ", matches)
            # print("sent: ", matches.groupdict()["sent"])
            new_sents.append(matches.groupdict()["sent"])
        return new_sents

    try:
        reply = reply.replace('\\"', '"')
        new_sents = parse_result_main(reply)
        if new_sents is None:
            new_sents = parse_result_multiline_list(reply)
            # if new_sent is None:
            #     print(reply)
        return new_sents
    except Exception as ex:
        errors.append(
            {
                "image_id": image_id,
                "msg": str(ex) + " ex type: " + str(type(ex)),
                "raw_reply": reply,
            }
        )
        return None


def verify_results(result: list[dict], refcoco: COCO):
    image_id = result["image_id"]
    warnings, errors = [], []
    raw_reply: list[str] = result["api_response"]["choices"][0]["message"]["content"]
    fp_sents = parse_result(image_id, raw_reply, warnings, errors)
    request_info = result["request_info"]
    gt_sents = request_info["sentences"]

    if fp_sents is None:
        return warnings, errors
    assert isinstance(fp_sents, list)

    # Ensure correct number of FP sentences were generated:
    if len(fp_sents) != len(gt_sents):
        errors.append(
            {
                "image_id": image_id,
                "msg": "Wrong number of FP sentences",
                "msg_detail": f"len(fp_sents):{len(fp_sents)}!=len(gt_sents):{len(gt_sents)}",
                "fp_sents": fp_sents,
                "gt_sents": gt_sents,
                "reply": result,
                "raw_reply": raw_reply,
            }
        )

    for sent, fp_sent in zip(gt_sents, fp_sents):
        # Warn if FP sentence is same as original sentence:
        if sent.lower() == fp_sent.lower():
            warnings.append(
                {
                    "image_id": image_id,
                    "msg": "FP is exact match for GT sentence",
                    "msg_detail": f"{sent}=={fp_sent}",
                    "fp_sents": fp_sent,
                    "gt_sent": sent,
                    "reply": result,
                    "raw_reply": raw_reply,
                }
            )
            continue

        diff_result = string_diff(sent.lower(), fp_sent.lower())
        num_matching_spans = len([res[0] for res in diff_result if res[0] == "="])

        if num_matching_spans not in {1, 2}:
            warnings.append(
                {
                    "image_id": image_id,
                    "msg": "Wrong num_matching_spans",
                    "msg_detail": f":{num_matching_spans}, diff:{diff_result}",
                    "fp_sents": fp_sent,
                    "gt_sent": sent,
                    "reply": result,
                    "raw_reply": raw_reply,
                }
            )
        # print("")
    result["fp_sents"] = fp_sents
    # print("warnings: ", len(warnings), "errors: ", len(errors))
    return warnings, errors


def check_fpsent_counts(results: list[dict]):
    has_fpsents_count = 0
    num_results = 0
    for result in results:
        num_results += 1
        if "fp_sents" in result and len(result["fp_sents"]) > 0:
            has_fpsents_count += 1

    print("num_results: ", num_results)
    print("has_fpsents_count: ", has_fpsents_count)


# Check all results:
api_responses = load_api_responses(api_results_dir)[:200000]
warnings = []
errors = []
for result in tqdm(api_responses):
    _warnings, _errors = verify_results(result, refcoco)
    warnings.extend(_warnings)
    errors.extend(_errors)

check_fpsent_counts(api_responses)

# Summarize Results:
print(f"Found {len(warnings)} warnings")
print(f"Found {len(errors)} errors")
# print("")
# print("=" * 220)
# print("Warnings:")
# for warn in warnings[:3]:
#     print("")
#     print("=" * 100)
#     print(json.dumps(warn, indent=4))
# print("")
# print("=" * 220)
# print("Errors:")
# for err in errors[:3]:
#     print("")
#     print("=" * 100)
#     print(json.dumps(err, indent=4))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25798/25798 [00:00<00:00, 32457.13it/s]


Loaded 25798 responses from /home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25798/25798 [00:02<00:00, 11958.87it/s]

num_results:  25798
has_fpsents_count:  25624
Found 117 errors





### Save Intermediate Results: API Responses With Parsed Results

In [8]:
parsed_results_path = api_results_dir / "parsed_results_001.pkl"
pickle.dump(api_responses, open(parsed_results_path, "wb"))

## Show Error Counts Grouped By Type

In [9]:
df_errors = pd.DataFrame(errors)
# display(df_errors)
df_err_counts = (
    df_errors.groupby(["msg"])
    .agg(
        total=("image_id", "count"),
        uniqe_imgs=("image_id", "nunique"),
    )
    .sort_values("total", ascending=False)
)
display(df_err_counts)

Unnamed: 0_level_0,total,uniqe_imgs
msg,Unnamed: 1_level_1,Unnamed: 2_level_1
"invalid syntax (<unknown>, line 1) ex type: <class 'SyntaxError'>",59,59
Wrong number of FP sentences,55,55
"EOL while scanning string literal (<unknown>, line 1) ex type: <class 'SyntaxError'>",2,2
"invalid syntax (<unknown>, line 3) ex type: <class 'SyntaxError'>",1,1


This amount of errors seems acceptable. The top two types of error are:

- (59 errors) Invalid pythong list syntaks for the sentences, e.g., unmatched string quotes, missing commas
- (55 errors) Wrong number of false premise sentences returned by chat-gpt. If we wanted to, we could use whatever sentences gpt was able to provide.

In [10]:
with pd.option_context("display.max_colwidth", None, "display.max_columns", None):
    display(
        df_errors[df_errors.msg == "Wrong number of FP sentences"].sort_values(
            "raw_reply"
        )
    )

Unnamed: 0,image_id,msg,msg_detail,fp_sents,gt_sents,reply,raw_reply
32,259477,Wrong number of FP sentences,len(fp_sents):1!=len(gt_sents):0,[],[],"{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': '## Original Descriptions: (0 sentences): [""""]\n\nAltered Descriptions: (0 sentences): [""""]', 'role': 'assistant'}}], 'created': 1699058664, 'id': 'chatcmpl-8Gz3wAFimhjkWY9Npwech6CGZNxM2', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 22, 'prompt_tokens': 471, 'total_tokens': 493}}, 'image_id': 259477, 'request_info': {'ann_ids': [588620], 'image_id': 259477, 'ref_ids': [46540], 'sent_ids': [[97061, 97062]], 'sentences': [], 'subjects': []}, 'fp_sents': ['']}","## Original Descriptions: (0 sentences): [""""]\n\nAltered Descriptions: (0 sentences): [""""]"
55,357071,Wrong number of FP sentences,len(fp_sents):1!=len(gt_sents):6,"[The right most skateboardera man wearing black ski gear and a red strip on the arm standing on skateboardsa man turning around to pose for the camera while skateboarding in a black and white outfita man in black, on skateboards, standing between two other men on top of a hilla white color and green color sleevea man skateboarding wearing a tan jacket with purple sleeves]","[the right most skiier, a man wearing black ski gear and a red strip on the arm standing on skies, a man turning around to pose for the camera while skiing in a black and white outfit, a man in black , on skiis , standing between two other men on top of a mountain, a white color and blue color sleeve, a man skiing wearing a tan jacket with blue sleeves]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n\n""The right most skateboarder""\n""a man wearing black ski gear and a red strip on the arm standing on skateboards""\n""a man turning around to pose for the camera while skateboarding in a black and white outfit""\n""a man in black, on skateboards, standing between two other men on top of a hill""\n""a white color and green color sleeve""\n""a man skateboarding wearing a tan jacket with purple sleeves""', 'role': 'assistant'}}], 'created': 1699051001, 'id': 'chatcmpl-8Gx4L50Qnb4hUQ96IP39quYgANmP0', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 89, 'prompt_tokens': 555, 'total_tokens': 644}}, 'image_id': 357071, 'request_info': {'ann_ids': [440838, 468778, 476837], 'image_id': 357071, 'ref_ids': [11785, 12499, 36491], 'sent_ids': [[15798, 15799], [17434, 17435], [73680, 73681]], 'sentences': ['the right most skiier', 'a man wearing black ski gear and a red strip on the arm standing on skies', 'a man turning around to pose for the camera while skiing in a black and white outfit', 'a man in black , on skiis , standing between two other men on top of a mountain', 'a white color and blue color sleeve', 'a man skiing wearing a tan jacket with blue sleeves'], 'subjects': ['skiier', 'man', 'man', 'man', 'sleeve', 'man']}, 'fp_sents': ['The right most skateboardera man wearing black ski gear and a red strip on the arm standing on skateboardsa man turning around to pose for the camera while skateboarding in a black and white outfita man in black, on skateboards, standing between two other men on top of a hilla white color and green color sleevea man skateboarding wearing a tan jacket with purple sleeves']}","Altered Descriptions: \n\n""The right most skateboarder""\n""a man wearing black ski gear and a red strip on the arm standing on skateboards""\n""a man turning around to pose for the camera while skateboarding in a black and white outfit""\n""a man in black, on skateboards, standing between two other men on top of a hill""\n""a white color and green color sleeve""\n""a man skateboarding wearing a tan jacket with purple sleeves"""
20,204529,Wrong number of FP sentences,len(fp_sents):9!=len(gt_sents):10,"[a bottle of merlot, the bottle on the end that says merlot and has a purple box on it, glass in front of the cabernet bottle, glass in front of cabernet sauvignon bottle, a wine bottle with a red label, pinot noir wine bottle, a red strewn wine bottle, a glass of red wine second from the right, a guy who is wearing purple color shirt]","[a bottle of chardonnay, the bottle on the end that says chardonnay and has a green box on it, glass in front of the pinot bottle, glass in front of pinot blanco bottle, a wine bottle with a blue label, pinot blanc wine bottle, a white strewn wine bottle, a glass on white wine second from the right, a guy who is wearing orange color shirt, the left most person]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n\n1. ""a bottle of merlot""\n2. ""the bottle on the end that says merlot and has a purple box on it""\n3. ""glass in front of the cabernet bottle""\n4. ""glass in front of cabernet sauvignon bottle""\n5. ""a wine bottle with a red label""\n6. ""pinot noir wine bottle""\n7. ""a red strewn wine bottle""\n8. ""a glass of red wine second from the right""\n9. ""a guy who is wearing purple color shirt""\n10. ""the right most person""', 'role': 'assistant'}}], 'created': 1699050940, 'id': 'chatcmpl-8Gx3MMa5tk6Z43unC2z2Haqhxqz6P', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 124, 'prompt_tokens': 569, 'total_tokens': 693}}, 'image_id': 204529, 'request_info': {'ann_ids': [91365, 666556, 94460, 2093348, 666622, 1755762], 'image_id': 204529, 'ref_ids': [11542, 18044, 36843, 44261, 46242, 46725], 'sent_ids': [[15223, 15224], [30475, 30476], [74491, 74492], [91748], [96391], [97469, 97470]], 'sentences': ['a bottle of chardonnay', 'the bottle on the end that says chardonnay and has a green box on it', 'glass in front of the pinot bottle', 'glass in front of pinot blanco bottle', 'a wine bottle with a blue label', 'pinot blanc wine bottle', 'a white strewn wine bottle', 'a glass on white wine second from the right', 'a guy who is wearing orange color shirt', 'the left most person'], 'subjects': ['bottle', 'bottle', 'glass', 'glass', 'bottle', 'bottle', 'bottle', 'glass', 'guy', 'person']}, 'fp_sents': ['a bottle of merlot', 'the bottle on the end that says merlot and has a purple box on it', 'glass in front of the cabernet bottle', 'glass in front of cabernet sauvignon bottle', 'a wine bottle with a red label', 'pinot noir wine bottle', 'a red strewn wine bottle', 'a glass of red wine second from the right', 'a guy who is wearing purple color shirt']}","Altered Descriptions: \n\n1. ""a bottle of merlot""\n2. ""the bottle on the end that says merlot and has a purple box on it""\n3. ""glass in front of the cabernet bottle""\n4. ""glass in front of cabernet sauvignon bottle""\n5. ""a wine bottle with a red label""\n6. ""pinot noir wine bottle""\n7. ""a red strewn wine bottle""\n8. ""a glass of red wine second from the right""\n9. ""a guy who is wearing purple color shirt""\n10. ""the right most person"""
86,50885,Wrong number of FP sentences,len(fp_sents):9!=len(gt_sents):10,"[a yellow leather couch in front of two aliens playing wii, a leather stool, a woman in a white shirt playing video games, a man with a mustache and sunglasses operating a wii remote, a man wearing a jersey shirt that has the number 99 on it playing wii, the woman in the red jersey, a pink color chair in a room, the pink chair with arms visible just below and to the left of the computer monitor, the framed painting on the wall]","[a brown leather couch in front of two men playing wii, a leather couch, a man in a white shirt playing video games, a man with a beard and glasses operating a wii remote, a man wearing a jersey shirt that has the number 51 on it playing wii, the man in the blue jersey, a black color chair in a room, the black chair with arms visible just below and to the left of the computer monitor, the framed mirror on the wall, the mirror]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n\n1. ""a yellow leather couch in front of two aliens playing wii""\n2. ""a leather stool""\n3. ""a woman in a white shirt playing video games""\n4. ""a man with a mustache and sunglasses operating a wii remote""\n5. ""a man wearing a jersey shirt that has the number 99 on it playing wii""\n6. ""the woman in the red jersey""\n7. ""a pink color chair in a room""\n8. ""the pink chair with arms visible just below and to the left of the computer monitor""\n9. ""the framed painting on the wall""\n10. ""the painting""', 'role': 'assistant'}}], 'created': 1699050717, 'id': 'chatcmpl-8Gwzlg06bUrLQAINhuJAfti0EC9r4', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 137, 'prompt_tokens': 579, 'total_tokens': 716}}, 'image_id': 50885, 'request_info': {'ann_ids': [1937461, 228924, 229807, 1594782, 32064], 'image_id': 50885, 'ref_ids': [10687, 18047, 29094, 35511, 36216], 'sent_ids': [[13248, 13249], [30486, 30487], [56311, 56312], [71364, 71365], [73011, 73012]], 'sentences': ['a brown leather couch in front of two men playing wii', 'a leather couch', 'a man in a white shirt playing video games', 'a man with a beard and glasses operating a wii remote', 'a man wearing a jersey shirt that has the number 51 on it playing wii', 'the man in the blue jersey', 'a black color chair in a room', 'the black chair with arms visible just below and to the left of the computer monitor', 'the framed mirror on the wall', 'the mirror'], 'subjects': ['couch', 'couch', 'man', 'man', 'man', 'man', 'chair', 'chair', 'mirror', 'mirror']}, 'fp_sents': ['a yellow leather couch in front of two aliens playing wii', 'a leather stool', 'a woman in a white shirt playing video games', 'a man with a mustache and sunglasses operating a wii remote', 'a man wearing a jersey shirt that has the number 99 on it playing wii', 'the woman in the red jersey', 'a pink color chair in a room', 'the pink chair with arms visible just below and to the left of the computer monitor', 'the framed painting on the wall']}","Altered Descriptions: \n\n1. ""a yellow leather couch in front of two aliens playing wii""\n2. ""a leather stool""\n3. ""a woman in a white shirt playing video games""\n4. ""a man with a mustache and sunglasses operating a wii remote""\n5. ""a man wearing a jersey shirt that has the number 99 on it playing wii""\n6. ""the woman in the red jersey""\n7. ""a pink color chair in a room""\n8. ""the pink chair with arms visible just below and to the left of the computer monitor""\n9. ""the framed painting on the wall""\n10. ""the painting"""
1,125541,Wrong number of FP sentences,len(fp_sents):9!=len(gt_sents):10,"[an empty used purple bowl in front of a homemade pizza, a spoon on tabule, a pizza with pineapples and olives, the pizza as lots of broccoli, cheddar cheese, and a variety of vegetables, person wearing red next to child, the person on the very right of the child with the green shirt, the person sitting yellow, the woman with the dark jacket, child in front of the dining table]","[an empty used white bowl in front of a homemade pizza, a cup on tabule, a pizza with hot peppers and onions, the pizza as lots of peppers , mozzarella cheese , and a variety of meat, person wearing black next to child, the person on the very left of the child with the blue shirt, the person standing white, the man with the light jacket, child in fornt dinning table, a child licking his fingers]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n\n1. ""an empty used purple bowl in front of a homemade pizza""\n2. ""a spoon on tabule""\n3. ""a pizza with pineapples and olives""\n4. ""the pizza as lots of broccoli, cheddar cheese, and a variety of vegetables""\n5. ""person wearing red next to child""\n6. ""the person on the very right of the child with the green shirt""\n7. ""the person sitting yellow""\n8. ""the woman with the dark jacket""\n9. ""child in front of the dining table""\n10. ""a child washing his fingers""', 'role': 'assistant'}}], 'created': 1699050656, 'id': 'chatcmpl-8GwymMLHw4Ycg6WowuhDxwVQh3Y0z', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 128, 'prompt_tokens': 569, 'total_tokens': 697}}, 'image_id': 125541, 'request_info': {'ann_ids': [711207, 1071338, 1756168, 1733532, 513960], 'image_id': 125541, 'ref_ids': [10447, 10650, 12856, 33178, 33731], 'sent_ids': [[12672, 12673], [13155, 13156], [18272, 18273], [65865, 65866], [67181, 67182]], 'sentences': ['an empty used white bowl in front of a homemade pizza', 'a cup on tabule', 'a pizza with hot peppers and onions', 'the pizza as lots of peppers , mozzarella cheese , and a variety of meat', 'person wearing black next to child', 'the person on the very left of the child with the blue shirt', 'the person standing white', 'the man with the light jacket', 'child in fornt dinning table', 'a child licking his fingers'], 'subjects': ['bowl', 'cup', 'pizza', 'pizza', 'person', 'person', 'person', 'man', 'child', 'child']}, 'fp_sents': ['an empty used purple bowl in front of a homemade pizza', 'a spoon on tabule', 'a pizza with pineapples and olives', 'the pizza as lots of broccoli, cheddar cheese, and a variety of vegetables', 'person wearing red next to child', 'the person on the very right of the child with the green shirt', 'the person sitting yellow', 'the woman with the dark jacket', 'child in front of the dining table']}","Altered Descriptions: \n\n1. ""an empty used purple bowl in front of a homemade pizza""\n2. ""a spoon on tabule""\n3. ""a pizza with pineapples and olives""\n4. ""the pizza as lots of broccoli, cheddar cheese, and a variety of vegetables""\n5. ""person wearing red next to child""\n6. ""the person on the very right of the child with the green shirt""\n7. ""the person sitting yellow""\n8. ""the woman with the dark jacket""\n9. ""child in front of the dining table""\n10. ""a child washing his fingers"""
89,523995,Wrong number of FP sentences,len(fp_sents):9!=len(gt_sents):10,"[a man in a costume at a table, a man with a blue tie, a woman with sunglasses sitting in a chair who is wearing a headset, a green jacket and a yellow lanyard, the woman sitting on the end in a purple and white polka dot shirt, the pink chair that the woman is sitting in, there is purple chair on which a women sat, the chair that the man in the middle is sitting in, a chair that a man with brown hair is sitting in, an older man in a suit with sunglasses on at a meeting]","[a man in a suit at a table, a man with a red tie, a woman with glasses sitting in a chair who is wearing a headset , a black jacket and a blue lanyard, the woman sitting on the end in a black and white polka dot shirt, the black chair that the woman is sitting in, there is black chair on which a women sat, the chair that the man in the middle is sitting in, a chair that a man with black hair is sitting in, an older man in a suit with headphones on at a meeting, a man wearing a blue tie and glasses]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n1. ""a man in a costume at a table""\n2. ""a man with a blue tie""\n3. ""a woman with sunglasses sitting in a chair who is wearing a headset, a green jacket and a yellow lanyard""\n4. ""the woman sitting on the end in a purple and white polka dot shirt""\n5. ""the pink chair that the woman is sitting in""\n6. ""there is purple chair on which a women sat""\n7. ""the chair that the man in the middle is sitting in""\n8. ""a chair that a man with brown hair is sitting in""\n9. ""an older man in a suit with sunglasses on at a meeting""\n10. ""a man wearing a green tie and sunglasses""', 'role': 'assistant'}}], 'created': 1699047601, 'id': 'chatcmpl-8GwBVYg5HQJ3xiycVr12GXJCGbvLs', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 158, 'prompt_tokens': 601, 'total_tokens': 759}}, 'image_id': 523995, 'request_info': {'ann_ids': [433921, 452536, 1595854, 386342, 437985], 'image_id': 523995, 'ref_ids': [2868, 8250, 15809, 42479, 46195], 'sent_ids': [[60156, 60157], [7597, 7598], [25146, 25147], [87648, 87649], [96284, 96285]], 'sentences': ['a man in a suit at a table', 'a man with a red tie', 'a woman with glasses sitting in a chair who is wearing a headset , a black jacket and a blue lanyard', 'the woman sitting on the end in a black and white polka dot shirt', 'the black chair that the woman is sitting in', 'there is black chair on which a women sat', 'the chair that the man in the middle is sitting in', 'a chair that a man with black hair is sitting in', 'an older man in a suit with headphones on at a meeting', 'a man wearing a blue tie and glasses'], 'subjects': ['man', 'man', 'woman', 'woman', 'woman', 'women', 'man', 'man', 'man', 'man']}, 'fp_sents': ['a man in a costume at a table', 'a man with a blue tie', 'a woman with sunglasses sitting in a chair who is wearing a headset, a green jacket and a yellow lanyard', 'the woman sitting on the end in a purple and white polka dot shirt', 'the pink chair that the woman is sitting in', 'there is purple chair on which a women sat', 'the chair that the man in the middle is sitting in', 'a chair that a man with brown hair is sitting in', 'an older man in a suit with sunglasses on at a meeting']}","Altered Descriptions: \n1. ""a man in a costume at a table""\n2. ""a man with a blue tie""\n3. ""a woman with sunglasses sitting in a chair who is wearing a headset, a green jacket and a yellow lanyard""\n4. ""the woman sitting on the end in a purple and white polka dot shirt""\n5. ""the pink chair that the woman is sitting in""\n6. ""there is purple chair on which a women sat""\n7. ""the chair that the man in the middle is sitting in""\n8. ""a chair that a man with brown hair is sitting in""\n9. ""an older man in a suit with sunglasses on at a meeting""\n10. ""a man wearing a green tie and sunglasses"""
53,351656,Wrong number of FP sentences,len(fp_sents):9!=len(gt_sents):12,"[a table of books at a pizza place, a purple dinner table behind another dinner table, a large sandwich sits on the table, a sandwich almost half eaten, a glass full of soda with sandwich in front of a girl, a full glass with a green straw, a green sweater on the back of a chair, the green jacket on the back of the chair, a baby eating pizza]","[a table of plates at a pizza place, a brown dinner table behind another dinner table, a large pizza sits on the table, a pizza almost half eaten, a glass full of juice with pizza in fort of a girl, a full glass with a red straw, a red sweater on the back of a chair, the red jacket on the back of the chair, a babe etning in pizza, a child in a striped shirt is eating pizza, red back chair with a child sitting on it, the chair in which the girl is sitting]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n1. ""a table of books at a pizza place""\n2. ""a purple dinner table behind another dinner table""\n3. ""a large sandwich sits on the table""\n4. ""a sandwich almost half eaten""\n5. ""a glass full of soda with sandwich in front of a girl""\n6. ""a full glass with a green straw""\n7. ""a green sweater on the back of a chair""\n8. ""the green jacket on the back of the chair""\n9. ""a baby eating pizza""\n10. ""a child in a polka dot shirt is eating sandwich""\n11. ""green back chair with a child sitting on it""\n12. ""the chair in which the boy is sitting""', 'role': 'assistant'}}], 'created': 1699050142, 'id': 'chatcmpl-8GwqUyzEx85Bb5yPvcKM6GdHMokUK', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 151, 'prompt_tokens': 590, 'total_tokens': 741}}, 'image_id': 351656, 'request_info': {'ann_ids': [1616977, 390440, 1877746, 1936822, 428884, 1945838], 'image_id': 351656, 'ref_ids': [8489, 9382, 10634, 17650, 32921, 38293], 'sent_ids': [[8125, 8126], [10199, 10200], [13119, 13120], [29563, 29564], [65252, 65253], [77801, 77802]], 'sentences': ['a table of plates at a pizza place', 'a brown dinner table behind another dinner table', 'a large pizza sits on the table', 'a pizza almost half eaten', 'a glass full of juice with pizza in fort of a girl', 'a full glass with a red straw', 'a red sweater on the back of a chair', 'the red jacket on the back of the chair', 'a babe etning in pizza', 'a child in a striped shirt is eating pizza', 'red back chair with a child sitting on it', 'the chair in which the girl is sitting'], 'subjects': ['table', 'table', 'pizza', 'pizza', 'glass', 'glass', 'sweater', 'jacket', 'babe', 'child', 'child', 'girl']}, 'fp_sents': ['a table of books at a pizza place', 'a purple dinner table behind another dinner table', 'a large sandwich sits on the table', 'a sandwich almost half eaten', 'a glass full of soda with sandwich in front of a girl', 'a full glass with a green straw', 'a green sweater on the back of a chair', 'the green jacket on the back of the chair', 'a baby eating pizza']}","Altered Descriptions: \n1. ""a table of books at a pizza place""\n2. ""a purple dinner table behind another dinner table""\n3. ""a large sandwich sits on the table""\n4. ""a sandwich almost half eaten""\n5. ""a glass full of soda with sandwich in front of a girl""\n6. ""a full glass with a green straw""\n7. ""a green sweater on the back of a chair""\n8. ""the green jacket on the back of the chair""\n9. ""a baby eating pizza""\n10. ""a child in a polka dot shirt is eating sandwich""\n11. ""green back chair with a child sitting on it""\n12. ""the chair in which the boy is sitting"""
59,369774,Wrong number of FP sentences,len(fp_sents):1!=len(gt_sents):6,[girl with pigtails playing nintendo],"[girl with braids playing wii, the girl playing wii, a boy in green playing wii, a boy playing game, the woman in a white shirt who is not playing the wii, person on right]","{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: \n1. ""girl with pigtails playing nintendo"" \n2. ""the girl playing xbox"" \n3. ""a boy in purple playing playstation"" \n4. ""a girl playing game"" \n5. ""the man in a red shirt who is not playing the wii"" \n6. ""person on left""', 'role': 'assistant'}}], 'created': 1699053338, 'id': 'chatcmpl-8Gxg2MYQqy5x18c5RIkBPtmYulOac', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 74, 'prompt_tokens': 520, 'total_tokens': 594}}, 'image_id': 369774, 'request_info': {'ann_ids': [200806, 205674, 209198], 'image_id': 369774, 'ref_ids': [19158, 20161, 34232], 'sent_ids': [[33051, 33052], [35374, 35375], [68349, 68350]], 'sentences': ['girl with braids playing wii', 'the girl playing wii', 'a boy in green playing wii', 'a boy playing game', 'the woman in a white shirt who is not playing the wii', 'person on right'], 'subjects': ['girl', 'girl', 'boy', 'game', 'woman', 'person']}, 'fp_sents': ['girl with pigtails playing nintendo']}","Altered Descriptions: \n1. ""girl with pigtails playing nintendo"" \n2. ""the girl playing xbox"" \n3. ""a boy in purple playing playstation"" \n4. ""a girl playing game"" \n5. ""the man in a red shirt who is not playing the wii"" \n6. ""person on left"""
92,537022,Wrong number of FP sentences,len(fp_sents):1!=len(gt_sents):0,[],[],"{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: (0 sentences): [""""]', 'role': 'assistant'}}], 'created': 1699050325, 'id': 'chatcmpl-8GwtRpul8cI0HbyazaepWnvqGUq7z', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 11, 'prompt_tokens': 471, 'total_tokens': 482}}, 'image_id': 537022, 'request_info': {'ann_ids': [1624077], 'image_id': 537022, 'ref_ids': [9190], 'sent_ids': [[9761, 9762]], 'sentences': [], 'subjects': []}, 'fp_sents': ['']}","Altered Descriptions: (0 sentences): [""""]"
61,376559,Wrong number of FP sentences,len(fp_sents):1!=len(gt_sents):0,[],[],"{'api_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Altered Descriptions: (0 sentences): [""""]', 'role': 'assistant'}}], 'created': 1699049611, 'id': 'chatcmpl-8GwhvglUmbcWfbsyYdnKqLso4EuPL', 'model': 'gpt-3.5-turbo-0613', 'object': 'chat.completion', 'usage': {'completion_tokens': 11, 'prompt_tokens': 471, 'total_tokens': 482}}, 'image_id': 376559, 'request_info': {'ann_ids': [150853], 'image_id': 376559, 'ref_ids': [7025], 'sent_ids': [[4770]], 'sentences': [], 'subjects': []}, 'fp_sents': ['']}","Altered Descriptions: (0 sentences): [""""]"


In [11]:
with pd.option_context("display.max_colwidth", None, "display.max_columns", None):
    display(df_errors.iloc[38]["raw_reply"])
    print(df_errors.iloc[38]["raw_reply"])
    print(df_errors.iloc[38]["raw_reply"].replace('\\"', '"'))

'Altered Descriptions: (5 sentences): ["a snowboard with a geisha design", "longboard on the right", "a pink skateboard", "a surf board , and written " cascadia long board " on it", "vertical skateboard at front"]'

Altered Descriptions: (5 sentences): ["a snowboard with a geisha design", "longboard on the right", "a pink skateboard", "a surf board , and written " cascadia long board " on it", "vertical skateboard at front"]
Altered Descriptions: (5 sentences): ["a snowboard with a geisha design", "longboard on the right", "a pink skateboard", "a surf board , and written " cascadia long board " on it", "vertical skateboard at front"]


---

### Scratch Code to Debug Regex parsing of the chatGPT replies

In [None]:
# def parse_result(image_id: int, reply: str, warnings, errors) -> list[str]:
#     def parse_result_main(reply: str, debug=False):
#         # print(f"ChatGPT Reply: \n\t{reply}")
#         matches = re.match(
#             # '.*Descriptions:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*(?P<descriptions>\\[\\".*\\"\\])',
#             '.*Description[s]{0,1}:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*\\[{0,1}(?P<descriptions>\\".*\\")\\]{0,1}',
#             reply,
#             re.MULTILINE | re.DOTALL,
#         )
#         if matches is None:
#             if DEBUG:
#                 print("parse_result_main() No matches")
#             return None
#         list_str = matches.group("descriptions")
#         if not list_str.startswith("["):
#             list_str = "[" + list_str
#         if not list_str.endswith("]"):
#             list_str = list_str + "]"

#         new_sent = ast.literal_eval(list_str)
#         # print("New Sents: ", new_sent)
#         if new_sent is None:
#             errors.append(
#                 {
#                     "image_id": image_id,
#                     "msg": f"No FP sents found (fp_sents is None)",
#                     "raw_reply": reply,
#                 }
#             )
#             return None
#         return new_sent

#     def parse_result_multiline_list(reply: str, debug=False):
#         matches = re.match(
#             '(?:.*Description[s]{0,1}:[ ]*(\\(.+ sentence[s]{0,1}\\):){0,1})\\n*?(?P<descriptions>\\n\\d\\.[ ]*\\"[^\\n\\"]+\\")+',
#             reply,
#             re.MULTILINE | re.DOTALL,
#         )
#         if matches is None:
#             if DEBUG:
#                 print("parse_result_multiline_list() No matches")
#             return None
#         captures = matches.capturesdict()
#         if (captures is None or len(captures) == 0) or (
#             captures is not None and "descriptions" not in captures
#         ):
#             return None
#         sents = []
#         for cap in captures["descriptions"]:
#             matches = re.match('(?:[\\d]\\.)\\s*?\\"(?P<sent>[^\\"]+)\\"', cap.strip())
#             # print("match: ", matches)
#             # print("sent: ", matches.groupdict()["sent"])
#             sents.append(matches.groupdict()["sent"])
#         return sents

#     # try:
#     # reply = reply.replace('\\"', '"')
#     new_sent = parse_result_main(reply)
#     print("new_sent1: ", new_sent)
#     if new_sent is None:
#         new_sent = parse_result_multiline_list(reply)
#         print("new_sent2: ", new_sent)
#         # if new_sent is None:
#         #     print(reply)
#     return new_sent
#     # except Exception as ex:
#     #     print(
#     #         {
#     #             "image_id": image_id,
#     #             "msg": str(ex) + " ex type: " + str(type(ex)),
#     #             "raw_reply": reply,
#     #         }
#     #     )
#     #     return None
#     return None


# reply = """## Answer\n\nAltered Descriptions: (6 sentences): ["man in a purple and black jacket bending over", "an older gentleman reaching up to pick to sign a page \", "man wearing a yellow shirt", "man wearing glasses , purple shirt and khakis", "the chair behind the man in a yellow shirt", "the sofa in lavender color"]
# """
# reply = df_errors.iloc[38]["raw_reply"].replace('\\"', '"')
# print(reply)
# print(parse_result(-1, reply, [], []))

In [None]:
# # reply = """## Answer
# # Altered Descriptions: (4 sentences): ["the giraffe eating the grass on the ground", "the giraffe is grazing", "a giraffe looking straight at the camera", "giraffe on the left that is looking at cameraman"]
# # """
# # reply = df_errors.iloc[164]["raw_reply"]
# reply = '''Altered Descriptions:
# 1. "a bowl of some smelly food possibly applesauce"
# 2. "bowl of food with blue spoon in the bowl"'''

# print(reply)

# patterns = [
#     # re.compile(
#     #     '.*Descriptions:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*(?P<descriptions>\\[\\".*\\"\\])',
#     #     re.MULTILINE | re.DOTALL,
#     # ),
#     re.compile(
#         # '(?:[^\\n\\"]*Descriptions:\\s*(\\(.+ sentences\\):)\\s*\n+)(\\n\\d\\.\\s*\\"[^\\n\\"]+\\")+',
#         '(?:.*Descriptions:[ ]*(\\(.+ sentences\\):){0,1})\\n*?(?P<descriptions>\\n\\d\\.[ ]*\\"[^\\n\\"]+\\")+',
#         re.MULTILINE | re.DOTALL,
#     ),
# ]
# for pat in patterns:
#     matches = pat.match(reply)
#     if matches is None:
#         print("NO MATCHES")
#         continue
#     print("\nmatches: ", matches)
#     print("\ngroupdict: ", matches.groupdict())
#     print("\ncapturesdict: ", matches.capturesdict())
#     print("len(ncapturesdict) ", len(matches.capturesdict()["descriptions"]))

#     if matches is not None and "descriptions" in matches.groupdict():
#         list_str = matches.group("descriptions")
#         print("list: ", list_str)
#         # Convert the string to a Python object
#         new_sent = ast.literal_eval(list_str)
#         print("new_sent: ", new_sent)
#         break

In [None]:
# import ast

# import regex as re

# replies = [
#     'Altered Description: "purple bush to the left of the sign"',
#     '''Altered Descriptions:
# 1. "a bowl of some smelly food possibly applesauce"
# 2. "bowl of food with blue spoon in the bowl"''',
#     """Altered Descriptions: (1 sentence): ["a purple chair facing the garden"]""",
#     """Altered Descriptions: (6 sentences): ["the hat of the standing man", "a purple hat with repetitive circular patterns", "the girl in the green dress standing next to the man", "a girl with sunglasses on", "a man with straight blonde hair in a blue suit with a yellow hat stands with a woman in sunglasses and a red see through outfit", "a man with light blonde, straight hair wearing a suit and hat standing next to a woman"]""",
#     """Altered Descriptions: ["the hat of the standing man", "a purple hat with repetitive circular patterns", "the girl in the green dress standing next to the man", "a girl with sunglasses on", "a man with straight blonde hair in a blue suit with a yellow hat stands with a woman in sunglasses and a red see through outfit", "a man with light blonde, straight hair wearing a suit and hat standing next to a woman"]""",
#     """Modified Descriptions: (6 sentences): ["the hat of the standing man", "a purple hat with repetitive circular patterns", "the girl in the green dress standing next to the man", "a girl with sunglasses on", "a man with straight blonde hair in a blue suit with a yellow hat stands with a woman in sunglasses and a red see through outfit", "a man with light blonde, straight hair wearing a suit and hat standing next to a woman"]""",
#     """Modified Descriptions: ["the hat of the standing man", "a purple hat with repetitive circular patterns", "the girl in the green dress standing next to the man", "a girl with sunglasses on", "a man with straight blonde hair in a blue suit with a yellow hat stands with a woman in sunglasses and a red see through outfit", "a man with light blonde, straight hair wearing a suit and hat standing next to a woman"]""",
#     """Altered Descriptions (6 sentences): ["the bowtie of the standing man", "a neon green tie with repetitive lightning bolt patterns", "the girl in the pink dress standing next to the man", "a girl with sunglasses on", "a man with blue hair in a purple suit with a neon green bowtie stands with a woman in sunglasses and a neon pink outfit", "a man with blonde, straight hair wearing a tuxedo and bowtie standing next to a woman"]""",
#     """## Answer Altered Descriptions: (4 sentences): ["the giraffe eating the grass on the ground", "the giraffe is grazing", "a giraffe looking straight at the camera", "giraffe on the left that is looking at cameraman"]""",
#     """## Answer
# Altered Descriptions: (4 sentences): ["the giraffe eating the grass on the ground", "the giraffe is grazing", "a giraffe looking straight at the camera", "giraffe on the left that is looking at cameraman"]""",
#     #     """Altered Descriptions: (6 sentences):
#     # 1. "the crown of the standing man"
#     # 2. "a purple crown with repetitive circular patterns"
#     # 3. "the girl in the pink dress standing next to the man"
#     # 4. "a girl with sunglasses on"
#     # 5. "a man with curly black hair in a black suit with a purple crown stands with a woman in sunglasses and a black see through outfit"
#     # 6. "a man with dark, curly hair wearing a suit and crown standing next to a woman"
#     # """
# ]

# for reply in replies:
#     print("")
#     # matches = re.match("Altered Descriptions[:]+ \\(.{1,} sentences\\):\\s+(?P<descriptions>\\[\\\".*\\\"\\])", reply)
#     matches = re.match(
#         '.*Description[s]{0,1}:*\\s*(\\(.{1,} sentence[s]{0,1}\\):){0,1}\\s*\\[{0,1}(?P<descriptions>\\".*\\")\\]{0,1}',
#         reply,
#         re.MULTILINE,
#     )
#     print("matches: ", matches)
#     if matches is not None and "descriptions" in matches.groupdict():
#         print("match.group: ", matches.group("descriptions"))

#     if matches is not None and "descriptions" in matches.groupdict():
#         list_str = matches.group("descriptions")
#         # Convert the string to a Python object
#         new_sent = ast.literal_eval(list_str)
#         print("new_sent: ", new_sent)

---

## Categorize False Premise Types

In [12]:
import spacy

spacy.require_gpu()
import spacy_transformers

nlp = spacy.load("en_core_web_trf")

In [13]:
def get_fp_sentences_flat(responses: list[dict]) -> tuple[list[str], list[int]]:
    # Make a flat list of FP sentences so we can batch process with spacy:
    fp_sents_all = []
    img_ids_all = []
    for response in responses:
        if "fp_sents" not in response:
            continue
        fp_sents: list[str] = [s for s in response["fp_sents"] if len(s) > 0]
        fp_sents_all.extend(fp_sents)
        img_ids = [response["image_id"]] * len(fp_sents)
        img_ids_all.extend(img_ids)
    return fp_sents_all, img_ids_all


def get_gt_sentences_flat(refcoco: COCO) -> list[dict]:
    # Make a flat list of FP sentences so we can batch process with spacy:
    sents_all = []
    for ref_id, ref in refcoco.refs.items():
        sents: list[str] = ref["sentences"]
        sents_all.extend(
            [
                {
                    "sent": s,
                    "sent_id": s["sent_id"],
                    "image_id": ref["image_id"],
                    "ref_id": ref_id,
                }
                for s in sents
            ]
        )
    return sents_all


def get_spacy_docs(
    responses, refcoco: COCO, api_results_dir: Path, force_recompute: bool = False
) -> tuple[list[spacy.tokens.Doc], list[int]]:
    """
    Get spacy doc for FP sentences in all the responses. Caches the output to
    disk, and if a cached result already exists, it loads and returns that
    instead of re-computing the spacy Docs.

    Returns:
    :docs: flat list of spacy docs
    :img_ids: flat list (same length as docs) that maps indexes of docs to image_id
    """
    docs_path = api_results_dir / "fp_sentences_spacy_docs.pkl"
    fp_sents_all, doc_to_image = get_fp_sentences_flat(responses)

    if not force_recompute and docs_path.exists():
        print("loading cached spacy docs from disk")
        docs = pickle.load(open(docs_path, "rb"))
        assert len(docs) == len(fp_sents_all), f"{len(docs)} != {len(fp_sents_all)}"
    else:
        print("Computing spacy docs")
        B = 1000
        docs: list[spacy.tokens.Doc] = [
            d
            for d in tqdm(nlp.pipe(fp_sents_all, batch_size=B), total=len(fp_sents_all))
        ]
        assert len(docs) == len(fp_sents_all), f"{len(docs)} != {len(fp_sents_all)}"
        pickle.dump(docs, open(docs_path, "wb"))
    return docs, doc_to_image, fp_sents_all


# Only compute/load the docs if they aren't already in memory:
if (
    "docs" not in locals()
    or "doc_to_image" not in locals()
    or "fp_sents_all" not in locals()
    or True
):
    docs, doc_to_image, fp_sents_all = get_spacy_docs(
        api_responses, refcoco, api_results_dir, force_recompute=False
    )
print("Num spacy docs: ", len(docs))
print(len(docs), len(doc_to_image), len(fp_sents_all))

loading cached spacy docs from disk
Num spacy docs:  91466
91466 91466 91466


In [14]:
import string


def get_main_subject(sent: dict, use_root: bool = True):
    """Should always pass use_root=True, when there is no nsubj the ROOT is the main subject"""
    subjects = [
        word
        for word, dep in zip(sent["spcy_WORD"], sent["spcy_DEP"])
        if dep == "nsubj" and word not in nlp.Defaults.stop_words
    ]
    if use_root and (subjects is None or len(subjects) == 0):
        subjects = [
            word
            for word, dep in zip(sent["spcy_WORD"], sent["spcy_DEP"])
            if (dep == "ROOT") and word not in nlp.Defaults.stop_words
        ]
    return subjects


def tag_fp_sentences(
    responses: list[dict],
    refcoco: COCO,
    docs: list[spacy.tokens.Doc],
    doc_to_img: list[int],
    fp_sents_all: list[str],
):
    """
    Creates a deep copy of refcoco's `.refs_data` property, and enhances the
    copy by adding SpaCy NLP parsing tags for POS, TAG, DEP, etc. See here
    for more info: https://spacy.io/usage/linguistic-features
    """
    # Map docs back to the fp_sents
    fp_sent_dicts = []
    for i, (fp_sent, doc, image_id) in tqdm(
        enumerate(zip(fp_sents_all, docs, doc_to_image)), total=len(docs)
    ):
        # response = img_to_response[image_id]
        fp_sent_dict = {
            # "ref_id": -1, we'll have to match it with the right ref_id at some point
            "tokens": [
                word.strip()
                for word in fp_sent.split(" ")
                if word.strip() not in string.punctuation
            ],
            "raw": fp_sent,
            "sent_id": -1,
            "sent": fp_sent,
            "spcy_WORD": [str(word) for word in doc],
            "spcy_DEP": [word.dep_ for word in doc],
            "spcy_POS": [word.pos_ for word in doc],
            "spcy_LEM": [word.lemma_ for word in doc],
            "spcy_TAG": [word.tag_ for word in doc],
            "spcy_IS_STOP": [word.is_stop for word in doc],
            "spcy_ENTS": [ent.as_doc() for ent in doc.ents],
            "spcy_NOUN_CHUNKS": [str(nc) for nc in doc.noun_chunks],
            # "spcy_DOC": doc, # this takes up way too much space 2.7G vs 115MB
        }
        fp_sent_dict["main_subject"] = get_main_subject(fp_sent_dict)
        # print(fp_tags)
        fp_sent_dicts.append(fp_sent_dict)
        # if i == 0:
        #     print(fp_sent_dict)

    return fp_sent_dicts


fp_sent_dicts = tag_fp_sentences(
    api_responses, refcoco, docs, doc_to_image, fp_sents_all
)
print(len(fp_sent_dicts), len(docs))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91466/91466 [00:09<00:00, 10159.46it/s]

91466 91466





In [15]:
gt_flat = get_gt_sentences_flat(refcoco)
gt_flat[0]

{'sent': {'tokens': ['the', 'tie', 'of', 'the', 'standing', 'man'],
  'raw': 'The tie of the standing man',
  'sent_id': 2,
  'sent': 'the tie of the standing man',
  'spcy_DEP': ['det', 'ROOT', 'prep', 'det', 'amod', 'pobj'],
  'spcy_POS': ['DET', 'NOUN', 'ADP', 'DET', 'VERB', 'NOUN'],
  'spcy_LEM': ['the', 'tie', 'of', 'the', 'stand', 'man'],
  'spcy_TAG': ['DT', 'NN', 'IN', 'DT', 'VBG', 'NN'],
  'spcy_IS_STOP': [True, False, True, True, False, False],
  'spcy_ENTS': [the tie of the standing man],
  'spcy_WORD': ['the', 'tie', 'of', 'the', 'standing', 'man'],
  'spcy_NOUN_CHUNKS': ['the tie', 'the standing man']},
 'sent_id': 2,
 'image_id': 546154,
 'ref_id': 0}

# Correct Ambiguous FP vs. GT Sentence Matches

Sometimes GPT does not return an FP for every GT we input, i.e., we give it five ground truth sentences and it only outputs four false premise sentences. These cases are ambiguous (we don't know which sentence(s) GPT skipped) but we can attempt to match the outputs using string similarity metrics.

For the initial refcocog run, around 10% of the images consist of these ambiguous cases, so it is worth correcting. We do the corrections in the below cell.

#### fp vs gt sentence count 
- num_match: 23,128 images
- not_match: 2,496 images


In [16]:
from Levenshtein import distance as levenshtein_distance


def sentence_similarities(list_a: list[str], list_b: list[str]) -> dict[int, list[int]]:
    """Calculates the edit distance between elements of list_a and list_b."""
    similarity_scores = {}

    for i, a_item in enumerate(list_a):
        scores = []
        for b_item in list_b:
            score = levenshtein_distance(a_item, b_item)
            scores.append(score)
        similarity_scores[i] = scores

    return similarity_scores


def match_sentences(
    img_fps: list[dict], gt_refs_and_sents: list[tuple[dict, dict]], debug=False
):
    """
    Two list s of sentences for a single image, and attempt unambiguous match
    between them. img_fps is shorter in length than gt_refs_and_sents.
    """
    sim_scores = sentence_similarities(
        [sent["sent"] for sent in img_fps],
        [gt["sent"] for ref, gt in gt_refs_and_sents],
    )

    match_indices = []
    for (fp_idx, scores), fp in zip(sim_scores.items(), img_fps):
        idx = np.argmin(scores)
        match_indices.append(idx)

    if debug:
        print("match_indices: ", match_indices)
    # Consider matches unambiguous if each fp sentence is matched to a unique gt sentence
    is_unambiguous = len(set(match_indices)) == len(match_indices)
    result = []
    if is_unambiguous:
        for fp, match_idx in zip(img_fps, match_indices):
            gt_match = gt_refs_and_sents[match_idx]
            if debug:
                print(f"\t match: '{fp['sent']}', '{gt_match[1]['sent']}'")
            result.append((fp, gt_match))
    # else:
    #     print("No match! ", match_indices)
    return result


def match_fp_with_gt(fp_sent_dicts: list[dict], doc_to_image: list[int], refcoco: COCO):
    DEBUG = True
    refcoco = deepcopy(refcoco)
    gt_flat = get_gt_sentences_flat(refcoco)
    print("fp: ", len(fp_sent_dicts))
    print("gt: ", len(gt_flat))
    img_fps_all = defaultdict(list)
    {img_fps_all[img_id].append(fp) for fp, img_id in zip(fp_sent_dicts, doc_to_image)}
    num_match, num_not_match = 0, 0
    num_corrected = 0

    for idx, (image_id, img_fps) in tqdm(
        enumerate(img_fps_all.items()), total=len(img_fps_all)
    ):
        # img_fps is a list of dicts. Each dict has keys: ['tokens',
        #    'raw', 'sent_id', 'sent', 'spcy_WORD', 'spcy_DEP', 'spcy_POS',
        #    'spcy_LEM', 'spcy_TAG', 'spcy_IS_STOP', 'spcy_ENTS',
        #    'spcy_NOUN_CHUNKS', 'main_subject']
        # This function adds the following keys to these dicts:
        #    ['ref_id', 'ann_id', 'gt_sent_id', 'gt_sent']
        gt_refs_and_sents: list[tuple[dict, dict]] = []

        for img_ref in refcoco.img_to_refs[image_id]:
            # img_ref keys: ['image_id', 'split', 'sentences', 'file_name',
            #    'category_id', 'ann_id', 'sent_ids', 'ref_id']
            for s in img_ref["sentences"]:
                gt_refs_and_sents.append((img_ref, s))
        if len(gt_refs_and_sents) == len(img_fps):
            num_match += 1
            for fp, (img_ref, gt_sent) in zip(img_fps, gt_refs_and_sents):
                if len(fp["sent"].strip()) == 0:
                    print("EMPTY1 (img_id: {image_id}): ", fp)
                fp["ref_id"] = img_ref["ref_id"]
                fp["ann_id"] = img_ref["ann_id"]
                fp["gt_sent_id"] = gt_sent["sent_id"]
                fp["gt_sent"] = gt_sent["sent"]
                fp["is_false_premise"] = True
                img_ref["sentences"].append(fp)
        else:
            num_not_match += 1
            matches = match_sentences(img_fps, gt_refs_and_sents, False)
            if matches:
                num_corrected += 1
            for fp, (img_ref, gt_sent) in matches:
                if len(fp["sent"].strip()) == 0:
                    print(f"EMPTY2 (img_id: {image_id}): ", fp)
                fp["ref_id"] = img_ref["ref_id"]
                fp["ann_id"] = img_ref["ann_id"]
                fp["gt_sent_id"] = gt_sent["sent_id"]
                fp["gt_sent"] = gt_sent["sent"]
                fp["is_false_premise"] = True
                img_ref["sentences"].append(fp)
        # DEBUG:
        if DEBUG and num_corrected == 1:
            print("")
            print("=" * 200)
            print("Num gt: ", len(gt_refs_and_sents))
            print("img_fps: ", len(img_fps), img_fps[0].keys())
            img_refs = refcoco.img_to_refs[image_id]
            for img_ref in img_refs:
                print("")
                print("ref_id: ", img_ref["ref_id"])
                print("# sentences: ", len(img_ref["sentences"]))
                # print("# sentences: ", len([ref img_refs["sentences"]]))
                for s in img_ref["sentences"]:
                    print(
                        "sentence: ",
                        s["sent_id"],
                        s["sent"],
                        (
                            f"gt: ({s['gt_sent_id']}) {s['is_false_premise']}, {s['gt_sent']}"
                        )
                        if "is_false_premise" in s
                        else "",
                    )

            for i in range(len(img_fps)):
                display(
                    "img_fps: ",
                    [
                        f"'{k}': {v}"
                        for k, v in img_fps[i].items()
                        if k
                        in {
                            "sent_id",
                            "main_subject",
                            "ref_id",
                            "ann_id",
                            "gt_sent_id",
                            "gt_sent",
                            "sent",
                        }
                    ],
                )
            DEBUG = False

    print(
        f"num_match: {num_match}, not_match: {num_not_match}, num_corrected: {num_corrected}"
    )
    return refcoco


# REFSEG_DIR = Path("/shared/gbiamby/data/refer_seg")
# REFSEG_DIR = Path("output/ref_seg")
# refcoco = build_refcoco(REFSEG_DIR, "refcocog", "google_enhanced")
refcoco_new = match_fp_with_gt(fp_sent_dicts, doc_to_image, refcoco)

fp:  91466
gt:  95010


  0%|                                                                                                                                                                                                                                                                                     | 0/25577 [00:00<?, ?it/s]


Num gt:  4
img_fps:  3 dict_keys(['tokens', 'raw', 'sent_id', 'sent', 'spcy_WORD', 'spcy_DEP', 'spcy_POS', 'spcy_LEM', 'spcy_TAG', 'spcy_IS_STOP', 'spcy_ENTS', 'spcy_NOUN_CHUNKS', 'main_subject', 'ref_id', 'ann_id', 'gt_sent_id', 'gt_sent', 'is_false_premise'])

ref_id:  11375
# sentences:  3
sentence:  14832 a brown horse being ridden by a girl in a black helmet 
sentence:  14833 horse galloping on water , with rider wearing helmet 
sentence:  -1 a green elephant being ridden by a girl in a black helmet gt: (14832) True, a brown horse being ridden by a girl in a black helmet

ref_id:  43197
# sentences:  4
sentence:  89312 a brown horse running down the waterfront carrying a woman not wearing a hat 
sentence:  89313 the horse in front of another horse 
sentence:  -1 a green elephant running down the waterfront carrying a woman not wearing a hat gt: (89312) True, a brown horse running down the waterfront carrying a woman not wearing a hat
sentence:  -1 the elephant in front of another

'img_fps: '

["'sent_id': -1",
 "'sent': a green elephant being ridden by a girl in a black helmet",
 "'main_subject': ['elephant']",
 "'ref_id': 11375",
 "'ann_id': 57045",
 "'gt_sent_id': 14832",
 "'gt_sent': a brown horse being ridden by a girl in a black helmet"]

'img_fps: '

["'sent_id': -1",
 "'sent': a green elephant running down the waterfront carrying a woman not wearing a hat",
 "'main_subject': ['elephant']",
 "'ref_id': 43197",
 "'ann_id': 54496",
 "'gt_sent_id': 89312",
 "'gt_sent': a brown horse running down the waterfront carrying a woman not wearing a hat"]

'img_fps: '

["'sent_id': -1",
 "'sent': the elephant in front of another elephant",
 "'main_subject': ['elephant']",
 "'ref_id': 43197",
 "'ann_id': 54496",
 "'gt_sent_id': 89313",
 "'gt_sent': the horse in front of another horse"]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25577/25577 [00:00<00:00, 84586.33it/s]

num_match: 23101, not_match: 2476, num_corrected: 2439





---

### Detect Which Part of Each FP Sentence is Changed 

In [133]:
def get_num_changes(diff):
    diff_ops = [d[0] for d in diff]
    num_subs = "".join(diff_ops).count("-+")
    num_deletions = "".join(diff_ops).replace("-+", "").count("-")
    num_additions = "".join(diff_ops).replace("-+", "").count("+")
    total_changes = num_subs + num_deletions + num_additions
    return total_changes, num_subs, num_deletions, num_additions


def get_sentence_lookup(refcoco: COCO) -> dict[int, dict[str, Any]]:
    """Returns dict of all the sentences, with sent_id as the key"""
    sent_lookup = {}
    for ref_id, ref in refcoco.refs.items():
        for sent in ref["sentences"]:
            if "is_false_premise" in sent and sent["is_false_premise"]:
                continue
            sent_lookup[sent["sent_id"]] = sent
    return sent_lookup


def get_change_type(fp_sent: dict, change_info: dict):
    change_type = ""

    if change_info["num_changes"] == 1 and change_info["num_subs"] == 1:
        # Sentence has a single change. Get the subtracted and added words:
        _sub_words, _add_words = None, None
        for op, words in change_info["diff"]:
            # print(op)
            if op == "-":
                _sub_words = words
                # print("_sub_words ", _sub_words)
            if op == "+":
                _add_words = words
                # print("_add_words ",_add_words)
                break
        assert _sub_words, str(change_info["diff"])
        assert _add_words, str(change_info["diff"])
        # Categorize:
        gt_subject = change_info["gt_subj"]
        if isinstance(gt_subject, list) and len(gt_subject) > 0:
            gt_subject = gt_subject[0]
        if gt_subject in _sub_words:
            change_type = "main_subject"
            if len(_sub_words) > 1:
                change_type += f"(+{len(_sub_words)})"
        else:
            change_type = "NOT_MAIN_SUBJ"
            new_phrase = " ".join(_add_words)
            for nc in change_info["fp_NOUN_CHUNKS"]:
                if new_phrase in nc:
                    change_type = "other_subject"

    return change_type


def detect_changes(refcoco: COCO):
    sent_lookup = get_sentence_lookup(refcoco)
    items = []
    for idx, (ref_id, ref) in tqdm(
        enumerate(refcoco.refs.items()), total=len(refcoco.refs)
    ):
        sentences: list[str] = [s for s in ref["sentences"] if ("is_false_premise" in s and s["is_false_premise"])]
        for sent in sentences:
            gt_sent = sent_lookup[sent["gt_sent_id"]]
            diffs = string_diff(sent["gt_sent"], sent["sent"])
            num_changes, subs, deletions, additions = get_num_changes(diffs)
            items.append(
                {
                    "ref_id": ref_id,
                    "image_id": ref["image_id"],
                    "cat_id": ref["category_id"],
                    "gt_subj": get_main_subject(gt_sent),
                    "fp_subj": sent["main_subject"],
                    "gt_sent": sent["gt_sent"],
                    "fp_sent": sent["sent"],
                    "num_changes": num_changes,
                    "num_subs": subs,
                    "num_del": deletions,
                    "num_add": additions,
                    "diff_ops": tuple([d[0] for d in diffs]),
                    "diff": diffs,
                    "gt_NOUN_CHUNKS": gt_sent["spcy_NOUN_CHUNKS"],
                    "fp_NOUN_CHUNKS": sent["spcy_NOUN_CHUNKS"],
                }
            )
            items[-1]["change_type"] = get_change_type(sent, items[-1])
            sent["change_type"] = items[-1]["change_type"]
            if idx < 2:
                print("")
                print(sent["main_subject"])
                print(sent["gt_sent"])
                print(sent["sent"])
                print(diff)
                print("num changes: ", num_changes)

    return pd.DataFrame(items)


df_changes = detect_changes(refcoco_new)
display(df_changes)

  9%|██████████████████████▍                                                                                                                                                                                                                                                | 4252/49822 [00:00<00:02, 21557.20it/s]


['tie']
the tie of the standing man
the tie of the dancing unicorn
<function diff at 0x7f0730ac0280>
num changes:  1

['tie']
a purple tie with repetitive circular patterns
a purple tie with magical circular patterns
<function diff at 0x7f0730ac0280>
num changes:  1

['dog']
a man getting ready to cut a cake
a dog getting ready to eat a cake
<function diff at 0x7f0730ac0280>
num changes:  2

['girl']
guy in green with knife in the right hand picture
girl in purple with knife in the right hand picture
<function diff at 0x7f0730ac0280>
num changes:  2


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49822/49822 [00:02<00:00, 22020.10it/s]


Unnamed: 0,ref_id,image_id,cat_id,gt_subj,fp_subj,gt_sent,fp_sent,num_changes,num_subs,num_del,num_add,diff_ops,diff,gt_NOUN_CHUNKS,fp_NOUN_CHUNKS,change_type
0,0,546154,32,[tie],[tie],the tie of the standing man,the tie of the dancing unicorn,1,1,0,0,"(=, -, +)","[(=, [the, tie, of, the]), (-, [standing, man]...","[the tie, the standing man]","[the tie, the dancing unicorn]",other_subject
1,0,546154,32,[tie],[tie],a purple tie with repetitive circular patterns,a purple tie with magical circular patterns,1,1,0,0,"(=, -, +, =)","[(=, [a, purple, tie, with]), (-, [repetitive]...","[a purple tie, repetitive circular patterns]","[a purple tie, magical circular patterns]",other_subject
2,1,208256,1,[man],[dog],a man getting ready to cut a cake,a dog getting ready to eat a cake,2,2,0,0,"(=, -, +, =, -, +, =)","[(=, [a]), (-, [man]), (+, [dog]), (=, [gettin...","[a man, a cake]","[a dog, a cake]",
3,1,208256,1,[guy],[girl],guy in green with knife in the right hand picture,girl in purple with knife in the right hand pi...,2,2,0,0,"(-, +, =, -, +, =)","[(-, [guy]), (+, [girl]), (=, [in]), (-, [gree...","[guy, green, knife, the right hand picture]","[girl, purple, knife, the right hand picture]",
4,2,307082,8,[number],[number],a truck number 14 on a snow bank,a spaceship number 14 on a snow bank,1,1,0,0,"(=, -, +, =)","[(=, [a]), (-, [truck]), (+, [spaceship]), (=,...","[a truck number, a snow bank]","[a spaceship number, a snow bank]",other_subject
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91235,49819,400744,19,[horse],[unicorn],brown horse,brown unicorn,1,1,0,0,"(=, -, +)","[(=, [brown]), (-, [horse]), (+, [unicorn])]",[brown horse],[brown unicorn],main_subject
91236,49819,400744,19,[horse],[unicorn],a brown horse out in the grassy plains,a brown unicorn out in the grassy plains,1,1,0,0,"(=, -, +, =)","[(=, [a, brown]), (-, [horse]), (+, [unicorn])...","[a brown horse, the grassy plains]","[a brown unicorn, the grassy plains]",main_subject
91237,49820,82135,22,[elephant],[unicorn],an elephant in right corner,a unicorn in right corner,1,1,0,0,"(-, +, =)","[(-, [an, elephant]), (+, [a, unicorn]), (=, [...","[an elephant, right corner]","[a unicorn, right corner]",main_subject(+2)
91238,49820,82135,22,[elephant],[unicorn],a right most elephant,a right most unicorn,1,1,0,0,"(=, -, +)","[(=, [a, right, most]), (-, [elephant]), (+, [...",[a right most elephant],[a right most unicorn],main_subject


In [134]:
display(
    df_changes.groupby(["num_changes"], dropna=False).agg(total=("ref_id", "count"))
)
display(
    df_changes.groupby(["num_changes", "diff_ops"], dropna=False)
    .agg(total=("ref_id", "count"))
    .sort_values(["num_changes", "total"], ascending=[True, False])
)
display(
    df_changes.groupby(["num_changes", "num_subs"], dropna=False)
    .agg(total=("ref_id", "count"))
    .sort_values(["num_changes", "total"], ascending=[True, False])
)
display(
    df_changes.groupby(["change_type"], dropna=False).agg(total=("ref_id", "count"))
)

Unnamed: 0_level_0,total
num_changes,Unnamed: 1_level_1
0,1528
1,71262
2,15605
3,2359
4,400
5,71
6,14
7,1


Unnamed: 0_level_0,Unnamed: 1_level_0,total
num_changes,diff_ops,Unnamed: 2_level_1
0,"(=,)",1528
1,"(=, -, +, =)",50760
1,"(=, -, +)",11528
1,"(-, +, =)",8173
1,"(-, +)",611
...,...,...
6,"(=, -, +, =, -, +, =, -, +, =, -, +, =, -, +, =, -, +, =)",8
6,"(=, -, +, =, -, +, =, -, +, =, -, +, =, -, +, =, -, +)",3
6,"(-, +, =, -, +, =, -, +, =, -, +, =, -, +, =, -, +, =)",2
6,"(=, -, +, =, -, +, =, -, +, =, +, =, -, +, =, -, +)",1


Unnamed: 0_level_0,Unnamed: 1_level_0,total
num_changes,num_subs,Unnamed: 2_level_1
0,0,1528
1,1,71072
1,0,190
2,2,15445
2,1,134
2,0,26
3,3,2316
3,2,33
3,1,10
4,4,391


Unnamed: 0_level_0,total
change_type,Unnamed: 1_level_1
,20168
NOT_MAIN_SUBJ,3430
main_subject,21678
main_subject(+2),2629
main_subject(+3),255
main_subject(+4),79
main_subject(+5),12
main_subject(+6),1
main_subject(+7),1
other_subject,42987


In [97]:
# with pd.option_context(
#     "display.max_colwidth", None, "display.max_columns", None, "display.max_rows", 200
# ):
#     display(df_changes[df_changes.diff_ops == ("-", "+")])

# with pd.option_context(
#     "display.max_colwidth", None, "display.max_columns", None, "display.max_rows", 200
# ):
#     display(df_changes[df_changes.num_changes == 1])


with pd.option_context(
    "display.max_colwidth", None, "display.max_columns", None, "display.max_rows", 200
):
    display(df_changes[df_changes.change_type == "NOT_MAIN_SUBJ"])

Unnamed: 0,ref_id,image_id,cat_id,gt_subj,fp_subj,gt_sent,fp_sent,num_changes,num_subs,num_del,num_add,diff_ops,diff,gt_NOUN_CHUNKS,fp_NOUN_CHUNKS,change_type
0,0,546154,32,[tie],[tie],the tie of the standing man,the tie of the dancing unicorn,1,1,0,0,"(=, -, +)","[(=, [the, tie, of, the]), (-, [standing, man]), (+, [dancing, unicorn])]","[the tie, the standing man]","[the tie, the dancing unicorn]",NOT_MAIN_SUBJ
1,0,546154,32,[tie],[tie],a purple tie with repetitive circular patterns,a purple tie with magical circular patterns,1,1,0,0,"(=, -, +, =)","[(=, [a, purple, tie, with]), (-, [repetitive]), (+, [magical]), (=, [circular, patterns])]","[a purple tie, repetitive circular patterns]","[a purple tie, magical circular patterns]",NOT_MAIN_SUBJ
4,2,307082,8,[number],[number],a truck number 14 on a snow bank,a spaceship number 14 on a snow bank,1,1,0,0,"(=, -, +, =)","[(=, [a]), (-, [truck]), (+, [spaceship]), (=, [number, 14, on, a, snow, bank])]","[a truck number, a snow bank]","[a spaceship number, a snow bank]",NOT_MAIN_SUBJ
7,3,532439,22,[elephant],[elephant],a large elephant standing next to a large wooden pole,a large elephant standing next to a large ice cream cone,1,1,0,0,"(=, -, +)","[(=, [a, large, elephant, standing, next, to, a, large]), (-, [wooden, pole]), (+, [ice, cream, cone])]","[a large elephant, a large wooden pole]","[a large elephant, a large ice cream cone]",NOT_MAIN_SUBJ
9,4,155351,86,[vase],[vase],tall vase in middle,short vase in middle,1,1,0,0,"(-, +, =)","[(-, [tall]), (+, [short]), (=, [vase, in, middle])]","[tall vase, middle]","[short vase, middle]",NOT_MAIN_SUBJ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91219,49810,197155,62,[light],[light],"a black resturant dining chair , with the light shining in the back","a pink resturant dining chair , with the light shining in the back",1,1,0,0,"(=, -, +, =)","[(=, [a]), (-, [black]), (+, [pink]), (=, [resturant, dining, chair, ,, with, the, light, shining, in, the, back])]","[a black resturant dining chair, the light, the back]","[a pink resturant dining chair, the light, the back]",NOT_MAIN_SUBJ
91226,49814,60890,16,[pelican],[pelican],a crazy looking pelican,a funny looking pelican,1,1,0,0,"(=, -, +, =)","[(=, [a]), (-, [crazy]), (+, [funny]), (=, [looking, pelican])]",[a crazy looking pelican],[a funny looking pelican],NOT_MAIN_SUBJ
91227,49814,60890,16,[bird],[bird],large black ans white bird with gray legs standing on grass,large purple and white bird with gray legs standing on grass,1,1,0,0,"(=, -, +, =)","[(=, [large]), (-, [black, ans]), (+, [purple, and]), (=, [white, bird, with, gray, legs, standing, on, grass])]","[large black ans white bird, gray legs, grass]","[large purple and white bird, gray legs, grass]",NOT_MAIN_SUBJ
91230,49816,489524,9,[canoe],[canoe],blue canoe with tg 14 tag,green canoe with tg 14 tag,1,1,0,0,"(-, +, =)","[(-, [blue]), (+, [green]), (=, [canoe, with, tg, 14, tag])]","[blue canoe, tg 14 tag]","[green canoe, tg 14 tag]",NOT_MAIN_SUBJ


In [124]:
def scrub(refcoco: COCO):
    refcoco = deepcopy(refcoco)
    for ref_id, ref in refcoco.refs.items():
        for s in ref["sentences"]:
            if "is_false_premise" not in s:
                s["is_false_premise"] = False
            s["exist"] = not s["is_false_premise"]
            if not s["change_type"]:
                s["change_type"]
    return refcoco


refcoco_new_scrubbed = scrub(refcoco_new)

## Save new RefCOCO Dataset

Enhanced version augments the `ref["sentences"]` dictionaries with spacy tagging info (parts of speech, dependency parsing, etc.


---

In [102]:
print(refcoco_new.refs[1000])
print("")
print(refcoco_new.refs_data[1000])
print(len(refcoco_new.refs_data), len(refcoco_new.refs))

{'image_id': 362498, 'split': 'val', 'sentences': [{'tokens': ['a', 'large', 'polar', 'bear', 'looking', 'at', 'a', 'smaller', 'polar', 'bear'], 'raw': 'A large polar bear looking at a smaller polar bear.', 'sent_id': 21606, 'sent': 'a large polar bear looking at a smaller polar bear', 'spcy_DEP': ['det', 'amod', 'amod', 'ROOT', 'acl', 'prep', 'det', 'amod', 'amod', 'pobj'], 'spcy_POS': ['DET', 'ADJ', 'ADJ', 'NOUN', 'VERB', 'ADP', 'DET', 'ADJ', 'ADJ', 'NOUN'], 'spcy_LEM': ['a', 'large', 'polar', 'bear', 'look', 'at', 'a', 'small', 'polar', 'bear'], 'spcy_TAG': ['DT', 'JJ', 'JJ', 'NN', 'VBG', 'IN', 'DT', 'JJR', 'JJ', 'NN'], 'spcy_IS_STOP': [True, False, False, False, False, True, True, False, False, False], 'spcy_ENTS': [], 'spcy_WORD': ['a', 'large', 'polar', 'bear', 'looking', 'at', 'a', 'smaller', 'polar', 'bear'], 'spcy_NOUN_CHUNKS': ['a large polar bear', 'a smaller polar bear']}, {'tokens': ['white', 'polar', 'bear', 'looking', 'at', 'another', 'bear'], 'raw': 'White polar bear lo

In [125]:
import shutil


def save_refs(refcoco: COCO, save_dir: Path, split_by: str):
    assert save_dir.exists(), str(save_dir)
    refs_path = save_dir / f"refs({split_by}).p"
    print("Saving refs: ", refs_path)
    pickle.dump(refcoco.refs_data, open(refs_path, "wb"))


def make_new_dataset(refcoco: COCO, save_dir: Path, dataset_name: str, split_by: str):
    new_dataset_path = save_dir / "refer_seg" / f"{dataset_name}"
    new_dataset_path.mkdir(exist_ok=True, parents=True)
    # Copy coco instances.json:
    source_path = refcoco.DATA_DIR / "instances.json"
    assert source_path.exists(), str(source_path)
    print("saving instances.json: ", new_dataset_path / "instances.json")
    shutil.copy(source_path, new_dataset_path / "instances.json")
    save_refs(refcoco, new_dataset_path, split_by)
    print("Saved new refer_seg dataset to: ", new_dataset_path)
    return new_dataset_path


new_ds_path = make_new_dataset(
    refcoco_new, api_results_dir, "fprefcocog_v002", "berkeley"
)

saving instances.json:  /home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo/refer_seg/fprefcocog_v002/instances.json
Saving refs:  /home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo/refer_seg/fprefcocog_v002/refs(berkeley).p
Saved new refer_seg dataset to:  /home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo/refer_seg/fprefcocog_v002


## Load and Inspect the Newly Saved RefCOCO Dataset

In [126]:
VALID_SPLITS = {
    "R-refcoco": ["unc"],
    "R-refcoco+": ["unc"],
    "R-refcocog": ["umd"],
    "refclef": ["berkeley", "unc"],
    "refcoco": ["google"],
    "refcoco+": ["unc"],
    "refcocog": ["google", "umd"],
    "fprefcocog_v002": ["berkeley"],
}


def build_refcoco(refseg_path: Path, dataset_name: str, split_by: str = None) -> COCO:
    assert dataset_name in VALID_SPLITS, dataset_name
    if split_by is None:
        split_by = VALID_SPLITS[dataset_name][0]
    else:
        assert split_by in VALID_SPLITS[dataset_name]
    coco = COCO(
        refseg_path / dataset_name / "instances.json",
        is_ref_dataset=True,
        dataset_name=dataset_name,
        split_by=split_by,
    )
    return coco


df_aggs = []
for ds_name in ["fprefcocog_v002"]:
    print("\n\n")
    print("=" * 220)
    print(f"Dataset: {ds_name}(berkeley)")
    coconegref_stats = CocoClassDistHelper(
        new_ds_path.parent,
        is_ref_dataset=True,
        dataset_name=ds_name,
        split_by="berkeley",
    )
    df_refcoco, df_refcoco_agg = coconegref_stats.get_ref_stats()
    df_aggs.append(df_refcoco_agg)


df_aggs = pd.concat(df_aggs)


display(df_aggs)




Dataset: fprefcocog_v002(berkeley)
Loading refs from '/home/gbiamby/proj/geo-llm-ret/output/refcocog_google_enhanced-gb006_remove_guidelines-gpt-3.5-turbo/refer_seg/fprefcocog_v002/refs(berkeley).p'
Loaded 49822 refs
loading annotations into memory...
Done (t=1.18s)
creating index...
index created!
num images: 25799
num annotations: 208960
pos/neg sentence_counts:  95010 91240


Unnamed: 0,pos_sent_count,neg_sent_count,dataset,num_refs,sent_count,total_pos_sents,total_neg_sents,ann_count,img_count
0,1,0,fprefcocog_v002(berkeley),243,243,243,0,208960,25799
1,1,1,fprefcocog_v002(berkeley),4471,8942,4471,4471,208960,25799
2,2,0,fprefcocog_v002(berkeley),599,1198,1198,0,208960,25799
3,2,1,fprefcocog_v002(berkeley),2318,6954,4636,2318,208960,25799
4,2,2,fprefcocog_v002(berkeley),42111,168444,84222,84222,208960,25799
5,3,0,fprefcocog_v002(berkeley),1,3,3,0,208960,25799
6,3,2,fprefcocog_v002(berkeley),8,40,24,16,208960,25799
7,3,3,fprefcocog_v002(berkeley),71,426,213,213,208960,25799


In [129]:
def show_a_refexp(refcoco: COCO):
    ref = refcoco.refs[1000]
    print("ref has keys: ", ref.keys())
    print(f"ref has {len(ref['sentences'])} sentences")
    for s in ref["sentences"]:
        # print(s.keys())
        print(
            f"sent_id:{s['sent_id']}, is_FP:{s['is_false_premise']}, sent: '{s['sent']}'"
        )
        if s["is_false_premise"]:
            print("\tchange_type: ", s["change_type"])
            print(f"\tparent_sent_id: {s['gt_sent_id']}, parent_sent: '{s['gt_sent']}'")


show_a_refexp(coconegref_stats)

ref has keys:  dict_keys(['image_id', 'split', 'sentences', 'file_name', 'category_id', 'ann_id', 'sent_ids', 'ref_id'])
ref has 4 sentences
sent_id:21606, is_FP:False, sent: 'a large polar bear looking at a smaller polar bear'
sent_id:21607, is_FP:False, sent: 'white polar bear looking at another bear'
sent_id:-1, is_FP:True, sent: 'a large penguin looking at a smaller penguin'
	change_type:  
	parent_sent_id: 21606, parent_sent: 'a large polar bear looking at a smaller polar bear'
sent_id:-1, is_FP:True, sent: 'white penguin looking at another penguin'
	change_type:  
	parent_sent_id: 21607, parent_sent: 'white polar bear looking at another bear'


---

In [None]:
# def get_img_info(img_dir: Path, img: dict):
#     img_path = img_dir / img["file_name"]
#     img = deepcopy(img)
#     result = {
#         "filename": img_path.name,
#         "suffix": img_path.suffix,
#         "img_dim": np.asarray(pil_img.open(img_path).convert("L")).shape,
#         "image_height": np.asarray(pil_img.open(img_path).convert("L")).shape[0],
#         "image_width": np.asarray(pil_img.open(img_path).convert("L")).shape[1],
#     }
#     result["area"] = result["img_dim"][0] * result["img_dim"][1]
#     img.update(result)
#     return img


# print(f"Found {len(coco.imgs)} images to process.")
# tqdm._instances.clear()
# df_imgs = pd.DataFrame(
#     get_img_info(IMG_DIR, img) for img in tqdm(list(coco.imgs.values()))
# )
# display(df_imgs)

In [None]:
# df_anns = pd.DataFrame(coco.anns.values()).drop(columns=["segmentation", "bbox"])
# display(df_anns)

In [None]:
# df_img_cat_counts = (
#     df_anns.groupby(["image_id", "supercategory", "category_id", "cat_name"])
#     .agg(total_anns=("id", "count"))
#     .reset_index()
# )
# display(df_img_cat_counts)

In [None]:
# df_imgs.merge(df_anns, how="inner", left_on="id", right_on="image_id")

In [None]:
# df_positive_cats = df_img_cat_counts[df_img_cat_counts.total_anns == 1].set_index(
#     ["image_id", "category_id"]
# )
# # df_positive_cats["]
# display(df_positive_cats)
# display(df_positive_cats.loc[581317])
# display(df_positive_cats.loc[581317, 77])

In [None]:
# def test_dataframe_indexing():
#     # Get one cat when there are many:
#     cats = df_positive_cats.loc[581317]
#     display(cats)
#     print(len(cats))
#     print(cats.sample(n=1, replace=False))

#     # When there is one cat
#     cats = df_positive_cats.loc[581615]
#     display(cats)
#     print(len(cats))
#     print(cats.sample(n=1, replace=False))

#     # # image_id doesn't exist
#     # cats = df_positive_cats.loc[58131887]
#     # display(cats)

#     # Check when category doesn't exist but img does


# test_dataframe_indexing()
# # df_positive_cats[df_positive_cats.index["image_id"]==581317]

In [None]:
# def get_img2cats(df: pd.DataFrame) -> dict[int, dict[str, Any]]:
#     img_cats = df.reset_index().to_dict(orient="records")
#     img2cats = defaultdict(dict)
#     for img_cat in img_cats:
#         img2cats[img_cat["image_id"]][img_cat["category_id"]] = img_cat

#     return img2cats


# cat_counts_pos_samples: dict = get_img2cats(df_positive_cats)
# cat_counts_all: dict = get_img2cats(df_img_cat_counts)
# print(len(cat_counts_pos_samples), len(df_img_cat_counts))

# print(cat_counts_pos_samples[581317])

In [None]:
def add_sentence(
    sentence_id: int,
    cat_id: int,
    coco: COCO,
    ref: Ref,
    exist: bool,
    pos_sent: dict[str, Any] = None,
    true_cat_id: int = None,
):
    """
    Add sentence, and sent_ids to the ref object. exist=True/False means it is a positive/negative sample, resp.
    """
    cat = coco.cats[cat_id]
    s = f"The {cat['name'].lower()}"
    sent = {
        "tokens": s.split(" "),
        "raw": s,
        "sent_id": sentence_id,
        "sent": s,  # TODO: what exactly is the dif between "raw" and "sent"?
        "exist": exist,
    }
    if exist == False:
        assert pos_sent is not None
        assert true_cat_id is not None
        sent["source_sent"] = pos_sent["sent_id"]
        sent["true_cat_id"] = true_cat_id
    ref["sent_ids"].append(sent["sent_id"])
    ref["sentences"].append(sent)
    return sent


def get_img2cats(coco: COCO) -> dict[int, dict[str, Any]]:
    """
    This is pretty effed up, but it arose from jupyter notebook code that started for one purpose and then evolved over time.

    Returns:
        Dict 1: key = image_id (int), value = Dict
        Dict 2: key = category_id (int), value = Dict (category object)
    """
    df_anns = pd.DataFrame(coco.anns.values()).drop(columns=["segmentation", "bbox"])
    df_anns["supercategory"] = df_anns.category_id.apply(
        lambda x: coco.cats[x]["supercategory"]
    )
    df_anns["cat_name"] = df_anns.category_id.apply(lambda x: coco.cats[x]["name"])
    display(df_anns)
    df_img_cat_counts = (
        df_anns.groupby(["image_id", "supercategory", "category_id", "cat_name"])
        .agg(total_anns=("id", "count"))
        .reset_index()
    )
    df_positive_cats = df_img_cat_counts[df_img_cat_counts.total_anns == 1].set_index(
        ["image_id", "category_id"]
    )
    img_cats = df_positive_cats.reset_index().to_dict(orient="records")
    img2cats = defaultdict(dict)
    for img_cat in img_cats:
        img2cats[img_cat["image_id"]][img_cat["category_id"]] = img_cat

    return img2cats


def coco_negref(
    args,
    split: str,
    dataset_name: str,
    split_by: str = "berkeley",
):
    """
    Generate COCO with negated annotations.

    Negated Anns are added to every image in the dataset. Negative classes are detected
    as those not appearing in an image ground truth.
    """
    np.random.seed(args.seed)
    coco_annotations_file: Path = args.coco_ann_path
    output_path: Path = args.output_path.absolute()
    output_path.mkdir(exist_ok=True, parents=True)
    assert coco_annotations_file.exists(), str(coco_annotations_file)

    coco_original = COCO(str(coco_annotations_file))
    coco_builder = CocoJsonBuilder(
        coco_original.dataset["categories"],
        dest_path=output_path,
        dest_name=f"instances.json",
        source_coco=coco_original,
        is_ref_dataset=True,
        dataset_name=dataset_name,
        split_by=split_by,
    )
    cat_counts_pos_samples: dict[int, dict[int, dict[str, Any]]] = get_img2cats(
        coco_original
    )
    sibling_lookup = get_sibling_lookup(coco_original, cat_counts_pos_samples)
    current_sent_id = 1

    for i, (img_id, img) in tqdm(
        enumerate(coco_original.imgs.items()), total=len(coco_original.imgs)
    ):
        annotations: list[Ann] = deepcopy(coco_original.imgToAnns[img_id])
        pos_candidates = []
        # Sample the positive classes. Primarily this means classes with exactly
        # one annotation in the image, therefore we can create unambiguous refering expressions for them:
        img_cat_counts: list[dict[int, dict]] = list(
            cat_counts_pos_samples[img_id].values()
        )
        if img_cat_counts is not None and len(img_cat_counts):
            pos_candidates = np.random.choice(
                img_cat_counts, args.num_pos_parents_per_image
            ).tolist()
        refs: list[Ref] = []
        # All anns pass through to the new dataset (but won't be used unless a ref points to them).
        # Each ref points to an ann_id. Each ref has a list of sentences.

        ## What we're building looks like this:
        # img: {"id": , "height": , ...}
        #     anns: [{"id": , "category_id": , bbox: , "segmentation": , ...}]
        #     refs: [{"ref_id": , "ann_id": , "category_id": , "image_id": , sent_ids: ,
        #        "sentences": [
        #             {
        #                 'tokens': ['the', 'man', 'in', 'yellow', 'coat'],
        #                 'raw': 'the man in yellow coat',
        #                 'sent_id': 8,
        #                 'sent': 'the man in yellow coat',  # TODO: what exactly is the dif between "raw" and "sent"?
        #                 'exist': True,
        #                 ...,
        #             }
        #     ]}, ...]

        for pos_candidate in pos_candidates:
            ann = next(
                ann
                for ann in annotations
                if ann["category_id"] == pos_candidate["category_id"]
            )
            assert ann is not None
            ref = {
                "image_id": img_id,
                "split": split,
                "file_name": img["file_name"],
                "category_id": ann["category_id"],
                "ann_id": ann["id"],
                "sent_ids": [],
                "ref_id": -1,
                "sentences": [],
            }
            pos_sent = add_sentence(
                current_sent_id, ann["category_id"], coco_original, ref, exist=True
            )
            current_sent_id += 1

            # Sample candidate negative classes (siblings)
            siblings = sibling_lookup[pos_candidate["category_id"]]
            if len(siblings) > 0:
                neg_samples = set(
                    np.random.choice(list(siblings), args.num_negs_per_pos)
                )
                for ns in neg_samples:
                    neg_sent = add_sentence(
                        current_sent_id,
                        ann["category_id"],
                        coco_original,
                        ref,
                        exist=False,
                        pos_sent=pos_sent,
                        true_cat_id=ann["category_id"],
                    )
                    current_sent_id += 1
            else:
                print("Empty siblings for cat: ", pos_candidate)
            refs.append(ref)

        # Add the negated annotations:
        new_img: Image = img
        coco_builder.add_image(new_img, annotations, refs)
    neg_coco_path = coco_builder.save()

    # Output a miniature version of the dataset file just for debugging/inspection:
    print("\n\n")
    print("Building shrunken version")
    num_images = 50
    mini_dataset_name = f"{dataset_name}_mini"
    shrinker = COCOShrinker(
        neg_coco_path,
        is_ref_dataset=True,
        split_by=split_by,
        dataset_name=dataset_name,
    )
    shrink_path = Path(f"./output/ref_seg/{mini_dataset_name}")
    shrinker.shrink(
        "instances.json",
        size=num_images,
        output_dir=shrink_path,
        is_ref_dataset=True,
        dataset_name=mini_dataset_name,
        split_by="berkeley",
    )


if __name__ == "__main__":
    _dataset_name = f"refcoconeg_v001"

    for year in ["2014", "2017"]:
        for split in ["val", "train"]:
            dataset_name = f"{_dataset_name}_{split}{year}"
            dataset_json = Path(
                COCO_DIR / "annotations" / f"instances_{split}{year}.json"
            )
            args = argparse.Namespace(
                **{
                    "coco_ann_path": dataset_json,
                    "output_path": Path(f"./output/ref_seg/{dataset_name}").resolve(),
                    "num_pos_parents_per_image": 1,
                    "num_negs_per_pos": 5,
                    "seed": 42,
                }
            )
            # print(args)
            coco_negref(
                args,
                split,
                dataset_name=dataset_name,
                split_by="berkeley",
            )

### Test Creating the new CocoNegRef Dataset, and Output Stats

In [None]:
df_aggs.groupby("dataset").agg(
    num_refs=("num_refs", "sum"),
    sent_count=("sent_count", "sum"),
    total_pos_sents=("total_pos_sents", "sum"),
    total_neg_sents=("total_neg_sents", "sum"),
    total_ann_count=("ann_count", "min"),
    total_img_count=("img_count", "min"),
)

In [None]:
from IPython.display import display

print(df.describe())
with pd.option_context("display.max_rows", 100, "display.max_columns", 10):
    display(df)

In [None]:
anns = {ann["id"] for ann in coco.anns.values()}
print(min(anns), max(anns))

In [None]:
import pickle

refs = pickle.load(
    open(
        "/home/gbiamby/proj/geo-llm-ret/lib/cocobetter/PythonAPI/notebooks/ref_correct/output/ref_seg/refcoconeg_v001/refs(berkeley).p",
        "rb",
    )
)
for idx, ref in enumerate(reversed(refs)):
    if idx > 1:
        break
    # print(ref["category_id"], ref["ann_id"])
    print(ref)
    # if ref["sent_ids"]:
    #     print(ref["sent_ids"])

In [None]:
print(list(coco.imgs.values())[-5:])

In [None]:
df_counts = (
    pd.DataFrame(list(coco_dist.get_cat_counts().values()))
    .sort_values("ann_count", ascending=False)
    .reset_index(drop=True)
)
total_anns = df_counts.ann_count.sum()
df_counts["ann_count_pdf"] = df_counts.ann_count / total_anns
df_counts["ann_count_cdf"] = df_counts.ann_count_pdf.cumsum()
display(df_counts)

In [None]:
df_

## Add frequency bins based on annotation count Cumulative Distribution Function


In [None]:
df_counts["freq_bin_2"] = df_counts.ann_count_cdf.apply(
    lambda x: "high" if x < 0.5 else "low"
)
df_counts["freq_bin_3"] = df_counts.ann_count_cdf.apply(
    lambda x: "high" if x < 0.333 else "medium" if x <= 0.667 else "low"
)
display(df_counts)

In [None]:
plt.figure(figsize=(25, 15))
color_map = plt.get_cmap("magma")
fig = sns.barplot(
    data=df_counts.sort_values(["img_count"], ascending=False),
    x="name",
    y="img_count",
    hue=df_counts.freq_bin_3.values,
)
fig.set_xticklabels(fig.get_xticklabels(), rotation=45, horizontalalignment="right")
fig.set_title(f"Per-category Annotated Image Counts")
plt.tight_layout()

In [None]:
plt.figure(figsize=(25, 15))
color_map = plt.get_cmap("magma")
fig = sns.barplot(
    data=df_counts.sort_values(["ann_count"], ascending=False),
    x="name",
    y="ann_count",
    hue="freq_bin_3",
)
fig.set_xticklabels(fig.get_xticklabels(), rotation=45, horizontalalignment="right")
fig.set_title(f"Per-category Annotation Counts")
plt.tight_layout()