In [8]:
import pathlib, json
import pandas as pd
from tqdm.notebook import tqdm

In [12]:
hssd_output_root = pathlib.Path("/fsx-siro/cgokmen/behavior-data2/hssd")
hssd_root = pathlib.Path("/fsx-siro/cgokmen/habitat-data/scene_datasets/hssd-hab")
hssd_models_root = pathlib.Path("/fsx-siro/cgokmen/hssd-models")
hssd_metadata = pd.read_csv(hssd_root / "metadata/hssd_obj_semantics_condensed.csv")
hssd_models = sorted([fn for fn in hssd_models_root.rglob("*.glb") if "filteredSupportSurface" not in fn.name and "collider" not in fn.name])

hssd_mapping = {}
hssd_missing_count = 0
for obj_path in tqdm(hssd_models):
    if obj_path.parts[-2] == "openings":
        category = "openings"
    elif obj_path.parts[-2] == "stages":
        category = "stages"
    else:
        # Check if it exists in the dataframe
        rows = hssd_metadata[hssd_metadata["Object Hash"] == obj_path.stem]
        if not rows.empty:
            category = rows.iloc[0][
                "Semantic Category:\nCONDENSED\n\nThis is an effort to condense the semantic categories by a couple hundred"
            ]
        else:
            print(f"Warning: {obj_path.stem} not found in metadata, defaulting to 'object'")
            category = "object"

    # Sanitize both category and model names to contain only letters (and underscores for category)
    category = "".join(c if c.isalnum() or c == "_" else "_" for c in category.lower())
    model = "hssd" + "".join(c if c.isalnum() else "" for c in obj_path.stem)

    model_root = hssd_output_root / "objects" / category / model
    success_file = model_root / "import.success"
    if not success_file.exists():
        print(f"Missing success file for {obj_path}: {success_file}")
        hssd_missing_count += 1
    
    assert obj_path.stem not in hssd_mapping, f"Filename overlap! {obj_path}"
    hssd_mapping[obj_path.stem] = (category, model)

(hssd_output_root / "object_name_mapping.json").write_text(json.dumps(hssd_mapping, indent=4))
print(hssd_missing_count, "missing hssd mappings")


  0%|          | 0/14267 [00:00<?, ?it/s]

Missing success file for /fsx-siro/cgokmen/hssd-models/objects/6/620aac904b075417a57a155ba2b43268ea5a3464.glb: /fsx-siro/cgokmen/behavior-data2/hssd/objects/lamp/hssd620aac904b075417a57a155ba2b43268ea5a3464/import.success
Missing success file for /fsx-siro/cgokmen/hssd-models/objects/9/917b764eeecca7c6224ed9dc3cf5ed13034b42f7.glb: /fsx-siro/cgokmen/behavior-data2/hssd/objects/table/hssd917b764eeecca7c6224ed9dc3cf5ed13034b42f7/import.success
Missing success file for /fsx-siro/cgokmen/hssd-models/objects/c/c4aca10e8652ed3704c50608a556afd9ff0a912a.glb: /fsx-siro/cgokmen/behavior-data2/hssd/objects/hanger/hssdc4aca10e8652ed3704c50608a556afd9ff0a912a/import.success
Missing success file for /fsx-siro/cgokmen/hssd-models/objects/f/f8c471802c4fd0622b2367a4f2ae7c58652aece7.glb: /fsx-siro/cgokmen/behavior-data2/hssd/objects/globe/hssdf8c471802c4fd0622b2367a4f2ae7c58652aece7/import.success
Missing success file for /fsx-siro/cgokmen/hssd-models/objects/openings/218-171.glb: /fsx-siro/cgokmen/behav

In [13]:
spoc_output_root = pathlib.Path("/fsx-siro/cgokmen/behavior-data2/spoc")
spoc_root = pathlib.Path("/fsx-siro/cgokmen/procthor/assets/2023_07_28")
spoc_annots = json.loads((spoc_root / "annotations.json").read_text())
spoc_models = sorted(spoc_root.glob("assets/*/*.glb"))

spoc_mapping = {}
spoc_missing_count = 0
for obj_path in tqdm(spoc_models):
    if obj_path.stem not in spoc_annots:
        print(f"Skipping {obj_path.stem} as it has no annotations")
        continue
    this_annots = spoc_annots[obj_path.stem]

    # Sanitize both category and model names to contain only letters (and underscores for category)
    category = "".join(c if c.isalnum() or c == "_" else "_" for c in this_annots["category"].lower())
    model = "spoc" + "".join(c if c.isalnum() else "" for c in obj_path.stem.lower())

    model_root = spoc_output_root / "objects" / category / model
    success_file = model_root / "import.success"

    if not success_file.exists():
        print(f"Missing success file for {obj_path}: {success_file}")
        spoc_missing_count += 1

    assert obj_path.stem not in spoc_mapping, f"Filename overlap! {obj_path}"
    spoc_mapping[obj_path.stem] = (category, model)

(spoc_output_root / "object_name_mapping.json").write_text(json.dumps(spoc_mapping, indent=4))
print(spoc_missing_count, "missing spoc mappings")

  0%|          | 0/38476 [00:00<?, ?it/s]

Missing success file for /fsx-siro/cgokmen/procthor/assets/2023_07_28/assets/75b9ea70272249dcbe812999297fd27f/75b9ea70272249dcbe812999297fd27f.glb: /fsx-siro/cgokmen/behavior-data2/spoc/objects/crate/spoc75b9ea70272249dcbe812999297fd27f/import.success
1 missing spoc mappings


In [15]:
ai2_output_root = pathlib.Path("/fsx-siro/cgokmen/behavior-data2/ai2thor")
ai2_hab_root = pathlib.Path("/fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab")
ai2_uc_root = pathlib.Path("/fsx-siro/cgokmen/procthor/ai2thor/ai2thorhab-uncompressed")
ai2_categories = pd.read_csv("/fsx-siro/cgokmen/procthor/ai2thor/ai2thor_categories.csv")
model2cat = dict(zip(ai2_categories["Model Name"], ai2_categories["Category"]))

ai2_main_models = set(ai2_uc_root.glob("assets/objects/*.glb"))
print(len(ai2_main_models), "main models")
ai2_stage_models = set(ai2_hab_root.glob("assets/stages/**/*.glb"))
print(len(ai2_stage_models), "stage models")
ai2_models = sorted(ai2_main_models | ai2_stage_models)
print(len(ai2_models), "models")

ai2_mapping = {}
ai2_missing_count = 0
for obj_path in tqdm(ai2_models):
    if obj_path in ai2_stage_models:
        # For stages, the file hierarchy is a bit different.
        stages_dir = ai2_hab_root / "assets" / "stages"
        assert stages_dir in obj_path.parents, f"Stage GLB {obj_path} is not in stages directory"
        # Get the index of stages_dir in the parents list
        idx = obj_path.parents.index(stages_dir)
        # The type is the next parent directory
        stage_type_dir = obj_path.parents[idx - 1]
        # If the filename doesn't already start with the type, rename it
        category = "stages"
        model = obj_path.stem
        if not model.startswith(stage_type_dir.name):
            model = f"{stage_type_dir.name}-{model}"
    else:
        model = obj_path.stem
        category = model2cat[model]

    # Sanitize both category and model names to contain only letters (and underscores for category)
    category = "".join(c if c.isalnum() or c == "_" else "_" for c in category.lower())
    model = "ai2thor" + "".join(c if c.isalnum() else "" for c in obj_path.stem)

    model_root = ai2_output_root / "objects" / category / model
    success_file = model_root / "import.success"
    if not success_file.exists():
        print(f"Missing success file for {obj_path}: {success_file}")
        ai2_missing_count += 1

    relpath = obj_path.relative_to(ai2_hab_root) if obj_path in ai2_stage_models else obj_path.relative_to(ai2_uc_root)
    assert obj_path.stem not in ai2_mapping, f"Filename overlap! {obj_path}"
    ai2_mapping[obj_path.stem] = (category, model)

(ai2_output_root / "object_name_mapping.json").write_text(json.dumps(ai2_mapping, indent=4))
print(ai2_missing_count, "missing ai2 mappings")

5320 main models
12235 stage models
17555 models


  0%|          | 0/17555 [00:00<?, ?it/s]

Missing success file for /fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab/assets/stages/ProcTHOR/1/ProcTHOR-Test-114.glb: /fsx-siro/cgokmen/behavior-data2/ai2thor/objects/stages/ai2thorProcTHORTest114/import.success
Missing success file for /fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab/assets/stages/ProcTHOR/2/ProcTHOR-Train-219.glb: /fsx-siro/cgokmen/behavior-data2/ai2thor/objects/stages/ai2thorProcTHORTrain219/import.success
Missing success file for /fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab/assets/stages/ProcTHOR/2/ProcTHOR-Train-448.glb: /fsx-siro/cgokmen/behavior-data2/ai2thor/objects/stages/ai2thorProcTHORTrain448/import.success
Missing success file for /fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab/assets/stages/ProcTHOR/2/ProcTHOR-Train-528.glb: /fsx-siro/cgokmen/behavior-data2/ai2thor/objects/stages/ai2thorProcTHORTrain528/import.success
Missing success file for /fsx-siro/cgokmen/procthor/ai2thor/ai2thor-hab/assets/stages/ProcTHOR/2/ProcTHOR-Train-533.glb: /fsx-siro/cgokmen