In [36]:
import csv, json

existing_substances = set()
existing_substance_params = {}
with open("/home/cgokmen/Downloads/substances.csv") as f:
    for x in csv.reader(f):
        existing_substances.add(x[0])
        existing_substance_params[x[0]] = x[1]

In [37]:
existing_substances

{'acetone',
 'adhesive_material',
 'alga',
 'allspice',
 'almond',
 'ammonia_water',
 'apple_juice',
 'applesauce',
 'ash',
 'baby_oil',
 'baking_powder',
 'barbecue_sauce',
 'beaten_egg',
 'beef_broth',
 'beef_stew',
 'beer',
 'bird_feed',
 'black_bean',
 'black_pepper',
 'bleaching_agent',
 'blueberry',
 'blueberry_mousse',
 'borax',
 'bourbon',
 'breadcrumb',
 'brown_rice',
 'brown_sugar',
 'bunchgrass',
 'cafe_au_lait',
 'cake_mix',
 'calcium_carbonate',
 'cane_sugar',
 'cappuccino',
 'carrot_juice',
 'cat_food',
 'catsup',
 'cayenne',
 'champagne',
 'cheese_pastry_filling',
 'cheese_sauce',
 'cherry_filling',
 'chia_seed',
 'chicken_broth',
 'chicken_curry',
 'chicken_soup',
 'chickpea',
 'chlorine',
 'chocolate_kiss',
 'chocolate_milk',
 'chocolate_sauce',
 'chowder',
 'cinnamon',
 'cinnamon_sugar',
 'clove',
 'coal',
 'coca_cola',
 'cocoa',
 'cocoa_powder',
 'coconut',
 'coconut_milk',
 'coconut_oil',
 'coffee_bean',
 'coffee_grounds',
 'conditioner',
 'cooked__diced__antipasto'

In [38]:
from bddl.knowledge_base import *

In [39]:
table_substances = set()
with open("/home/cgokmen/Downloads/synsets.csv") as f:
    for x in csv.DictReader(f):
        if x["objectType"].strip() in ("microPhysicalSubstance", "macroPhysicalSubstance", "liquid", "visualSubstance"):
            table_substances.add(x["synset"])

In [40]:
substances = [x for x in Synset.all_objects() if x.state == STATE_SUBSTANCE and not x.children]
substance_names = {x.name for x in substances}

In [41]:
substance_names == table_substances

True

In [42]:
print("Extra in table", table_substances - substance_names)
print("Extra in kb", substance_names - table_substances)

Extra in table set()
Extra in kb set()


In [43]:
custom_mappings = {
    "isopropyl_alcohol.n.01": "isopropanol",
    "lemon-pepper_seasoning.n.01": "lemon_pepper_seasoning",
    "low-fat_milk.n.01": "low_fat_milk",
}

In [44]:
substance_to_synset = {
    x: ([y for y in substance_names if y.split(".n.")[0] == x] + [y for y in custom_mappings.values() if y.split(".n.")[0] == x])
    for x in existing_substances
}
assert all(len(v) == 1 for v in substance_to_synset.values())
substance_to_synset = {k: v[0] for k, v in substance_to_synset.items()}
synset_to_substance = {v: k for k, v in substance_to_synset.items()}
substance_params = {}

In [45]:
# Add the cooked stuff
cooked_synset_to_substance = {}
cooked_substance_params = {}
for x in substance_names:
    if x in synset_to_substance:
        continue

    cooked_substance = x.split(".n.")[0]
    uncooked_substance = cooked_substance.replace("cooked__", "")
    if uncooked_substance not in substance_to_synset:
        continue

    cooked_synset_to_substance[x] = cooked_substance
    cooked_substance_params[cooked_substance] = existing_substance_params[uncooked_substance]

In [46]:
# Add the missing stuff
missing_substances = [x for x in substance_names if x not in synset_to_substance and x not in cooked_synset_to_substance]
print(len(missing_substances))
missing_synset_to_substance = {k: k.split(".n.")[0] for k in missing_substances}

204


In [49]:
# Generate the csv
rows = []
for x, sub in synset_to_substance.items():
    rows.append((sub, x, "manual", existing_substance_params[sub]))
for x, sub in cooked_synset_to_substance.items():
    rows.append((sub, x, "fromcooked", cooked_substance_params[sub]))
for x, sub in missing_synset_to_substance.items():
    rows.append((sub, x, "gpt", ""))

rows.sort()

with open("/home/cgokmen/Downloads/substances-fixed.csv", "w") as f:
    w = csv.writer(f)
    for r in rows:
        w.writerow(r)