# Substances Mismatch

In [1]:
from bddl.activity import Conditions
from data import *
import json
import os

os.makedirs("output", exist_ok=True)

In [2]:
# predicates indicating the presence of a substance
SUBSTANCE_PREDICATE = {"filled", "insource", "empty", "saturated", "contains", "covered"}

In [3]:
def get_substances_in_bddl(task_to_fn):
    found_substances = set()
    for task_name in task_to_fn:
        task_file = task_to_fn[task_name] / "problem0.bddl"
        assert task_file.exists(), f"{task_name} file missing"
        with open(task_file, "r") as f:
            predefined_problem = "".join(f.readlines())
        dom = "omnigibson" if "ObjectPropertyAnnotation" in str(task_file) else "igibson"
        conds = Conditions(task_name, "potato", dom, predefined_problem=predefined_problem)
        obj_to_synset = {obj: synset for synset, objs in conds.parsed_objects.items() for obj in objs}
        task_file = task_to_fn[task_name] / "problem0.bddl"
        for cond in conds.parsed_initial_conditions + conds.parsed_goal_conditions:
            if cond[0] in SUBSTANCE_PREDICATE:
                # in some bddl "covered" definitions, the substance is the 2nd one (reversed)
                try:
                    if cond[0] == "covered" and ("stain" in cond[2] or "dust" in cond[2]):
                        found_substances.add(obj_to_synset[cond[2]])
                    else:
                        found_substances.add(obj_to_synset[cond[1].split('?')[-1]])
                except KeyError:
                    print(f"KeyError: {cond[1]} in task {task_name}, adding {cond[1].split('?')[-1]} to found_substances")
                    found_substances.add(cond[1].split('?')[-1])
    return found_substances

In [4]:
# get all the bddl definitions
task_to_fn = get_tasks()
found_substances = get_substances_in_bddl(task_to_fn)
found_substances = sorted(found_substances)
with open("output/substances_in_bddl.json", "w") as f:
    json.dump(found_substances, f, indent=4)

KeyError: ice_tea.n.01 in task make_iced_tea, adding ice_tea.n.01 to found_substances


In [5]:
# Get all the substances in the synset csv
with open(rf"{os.path.pardir}/ObjectPropertyAnnotation/object_property_annots/properties_to_synsets.json", "r") as f:
    json_file = json.load(f)
synset_substances = sorted(set(json_file["liquid"]) & set(json_file["substance"]))

In [6]:
# compare differences
substances_synset_bddl_mismatch = {
    "substances_in_bddl_not_in_synset": sorted(set(found_substances) - set(synset_substances)),
    "substances_in_synset_not_in_bddl": sorted(set(synset_substances) - set(found_substances))
}
with open("output/substances_synset_bddl_mismatch.json", "w") as f:
    json.dump(substances_synset_bddl_mismatch, f, indent=4)

# Synsets Mismatch

In [7]:
# get current "ground truth" synsets
G = get_all_synsets()
legit_synsets = set(G.nodes)

In [11]:
# get all synsets in bddl & category mapping
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"C:\Users\capri28\Downloads\lucid-inquiry-205018-cd52e3c69473.json"
_, _, _, _, all_synsets_from_categorpy_mapping = get_category_synset_mapping([], [])
category_mapping_to_illegal_synsets = all_synsets_from_categorpy_mapping - (all_synsets_from_categorpy_mapping & legit_synsets)
task_to_synset, _, _, _, _, task_to_illegal_synsets, _ = get_task_synset_mapping(G, task_to_fn)
all_illegal_synsets = sorted(set(x for s in task_to_illegal_synsets.values() for x in s) | category_mapping_to_illegal_synsets)
with open("output/all_illegal_synsets.json", "w") as f:
    json.dump(all_illegal_synsets, f, indent=4)

make_eggnog
make_gazpacho


In [None]:
all_synsets_in_bddl_and_category_mapping = sorted(all_synsets_from_categorpy_mapping | set(x for s in task_to_synset.values() for x in s))
with open("output/all_synsets_in_bddl_and_category_mapping.json", "w") as f:
    json.dump(sorted(all_synsets_in_bddl_and_category_mapping), f, indent=4)

In [None]:
len(all_synsets_from_categorpy_mapping)

1332