In [1]:
import os
import numpy as np
import pandas as pd
import csv
import gzip
import json
from tqdm import tqdm
import shutil

In [2]:
#MP3D to HM3D mapping
path = "/mnt/L3MVN/data/matterport_category_mappings.tsv"

df = pd.read_csv(path, sep="\t")
print(df.head())

items = []

with open(path, 'r') as f:
    text = f.read()
lines = text.split('\n')

for l in lines:
    items.append(l.split('    '))

items_filt = [r for r in items if len(r) > 3]

#Scene Info
# scene_info_path = "data/objectgoal_PersONAL/val/easy/content/4ok3usBNeis.json.gz"
# scene_info_path = "data/objectgoal_PersONAL/val/test_baselines/medium_filt/content/4ok3usBNeis.json.gz"
# scene_info_path = "data/objectgoal_PersONAL/val/test_baselines/hard_filt/content/4ok3usBNeis.json.gz"

# with gzip.open(scene_info_path, "r") as f:
#     info = json.load(f)

# info.keys()

  index    raw_category    category    count    nyuId    nyu40id    eigen13id    nyuClass    nyu40class    eigen13class    ModelNet40    ModelNet10    ShapeNetCore55    synsetoffset    wnsynsetid    wnsynsetkey    mpcat40index    mpcat40
0  1    wall    wall    7667    21    1    12    ...                                                                                                                                                                                         
1  2    door    door    2544    28    8    12    ...                                                                                                                                                                                         
2  3    ceiling    ceiling    2363    4    22    ...                                                                                                                                                                                         
3  4    floor    floor    2252    11    2    5  

In [3]:
mp_cat_ids = [4, 11, 15, 12, 19, 23, 26, 24, 28, 38, 21, 16, 14, 6, 16]

### PersONAL Category to HM3D id

In [7]:
def load_tsv(path):

    items = []
    with open(path, 'r') as f:
        text = f.read()
    lines = text.split('\n')

    for l in lines:
        items.append(l.split('    '))

    return items

In [8]:
def cat_to_id(cat_name, map_cat_to_mp40, map_ids_mp_to_hm3d):

    #Find row corresponding to the category. This is used to extract the MP3D category id
    mp_cat_row = None
    for row in map_cat_to_mp40:
        if len(row) < 4: continue

        if row[2] == cat_name: 
            mp_cat_row = row
            break

    assert mp_cat_row is not None
    
    #Get MP3D id
    obj_mp_id = int(mp_cat_row[-2]) + 1
    assert obj_mp_id in map_ids_mp_to_hm3d

    #Convert id from MP3D to HM3D
    obj_hm3d_id = map_ids_mp_to_hm3d.index(obj_mp_id)
    return obj_hm3d_id

In [9]:
path = "/mnt/L3MVN/data/matterport_category_mappings.tsv"
items = load_tsv(path)

from constants import mp_categories_mapping as mp_cat_ids

hm3d_cat_id = cat_to_id(
                        # cat_name = "chair",
                        cat_name = "microwave",
                        map_cat_to_mp40 = items,
                        map_ids_mp_to_hm3d = mp_cat_ids)


# Test if right
from constants import hm3d_category

print(hm3d_cat_id, hm3d_category[hm3d_cat_id])

9 appliances


### Filter PersONAL Episodes : L3MVN invalid categories

Invalid categories that ones that do not correspond to the 16 HM3D categories (see constants/hm3d_category). Episodes containing these categories are removed.

NOTE : Need to define items_filt, mp_cat_ids

In [73]:
def open_gz(path):

    with gzip.open(path, "r") as f:
        return json.load(f)

def save_gz(path, info):

    with gzip.open(path, "wt") as f:
        json.dump(info, f)

In [78]:
root_dir = "/mnt/PersONAL/data/new/test_baselines"
dest_dir =  "/mnt/PersONAL/data/new/l3mvn_baseline"
os.makedirs(dest_dir, exist_ok = True)

for data_mode in ["easy", "medium", "hard"]:

    #Define content dirs
    source_content_dir = os.path.join(root_dir, data_mode, "content")
    dest_content_dir = os.path.join(dest_dir, data_mode, "content")
    os.makedirs(dest_content_dir, exist_ok = True)

    #Copy easy.json.gz file (similarly for medium, hard)
    source_data_info_path = os.path.join(root_dir, data_mode, f"{data_mode}.json.gz")
    dest_data_info_path = os.path.join(dest_dir, data_mode, f"{data_mode}.json.gz")
    if not os.path.exists(dest_data_info_path):
        shutil.copy(source_data_info_path, dest_data_info_path)

    #Iterate through scene content files
    source_eps, dest_eps = 0, 0
    for f_name in os.listdir(source_content_dir):

        if not f_name.endswith(".json.gz"): continue

        source_path = os.path.join(source_content_dir, f_name)
        dest_path = os.path.join(dest_content_dir, f_name)

        source_info = open_gz(source_path)
        dest_info = source_info.copy()
        dest_info["episodes"] = []

        for ep in source_info["episodes"]:

            obj_cat = ep["object_category"]

            cat_row = next(r for r in items_filt if r[2] == obj_cat)
            obj_hm3d_id = int(cat_row[-2]) + 1

            if obj_hm3d_id in mp_cat_ids:
                dest_info["episodes"].append(ep)

        source_eps += len(source_info['episodes'])
        dest_eps += len(dest_info["episodes"])

        dest_path = os.path.join(dest_content_dir, f_name)
        save_gz(dest_path, dest_info)

    print(f"{data_mode} : {dest_eps} / {source_eps}")


easy : 436 / 600
medium : 427 / 684
hard : 325 / 684


### Filter PersONAL Episodes : Multiple object instances

In [75]:
import os
import json
import gzip
import shutil

In [76]:
def open_gz(path):

    with gzip.open(path, "r") as f:
        return json.load(f)

def save_gz(path, info):

    with gzip.open(path, "wt") as f:
        json.dump(info, f)

In [77]:
root_dir = "/mnt/PersONAL/data/new"
# root_dir = "/mnt/PersONAL/data/split"
dest_dir = "/mnt/PersONAL/data/new/test_baselines"
os.makedirs(dest_dir, exist_ok=True)

for data_mode in ["easy", "medium", "hard"]:

    #Define content dirs
    source_content_dir = os.path.join(root_dir, data_mode, "content")
    dest_content_dir = os.path.join(dest_dir, data_mode, "content")
    os.makedirs(dest_content_dir, exist_ok = True)

    #Copy easy.json.gz file (similarly for medium, hard)
    source_data_info_path = os.path.join(root_dir, data_mode, f"{data_mode}.json.gz")
    dest_data_info_path = os.path.join(dest_dir, data_mode, f"{data_mode}.json.gz")
    if not os.path.exists(dest_data_info_path):
        shutil.copy(source_data_info_path, dest_data_info_path)

    #Iterate through scene content files
    source_eps, dest_eps = 0, 0
    for f_name in os.listdir(source_content_dir):

        if not f_name.endswith(".json.gz"): continue

        source_path = os.path.join(source_content_dir, f_name)
        dest_path = os.path.join(dest_content_dir, f_name)

        source_info = open_gz(source_path)
        dest_info = source_info.copy()
        dest_info["episodes"] = []

        for ep in source_info["episodes"]:

            if (type(ep["object_id"]) is str) and (len(ep["description"]) == 3):
                dest_info["episodes"].append(ep)

        source_eps += len(source_info['episodes'])
        dest_eps += len(dest_info["episodes"])

        dest_path = os.path.join(dest_content_dir, f_name)
        save_gz(dest_path, dest_info)
    
    print(f"{data_mode} : {dest_eps} / {source_eps}")


easy : 600 / 600
medium : 684 / 700
hard : 684 / 700


In [82]:
## Testing if all episodes present
dest_dir = "/mnt/PersONAL/data/new/test_baselines"
dest_dir = "/mnt/zson/data/datasets/PersONAL/val/test_baselines"
# dest_dir = "/mnt/PersONAL/data/new/l3mvn_baselines"

for data_mode in ["easy", "medium", "hard"]:

    ep_num = 0
    content_dir = os.path.join(dest_dir, data_mode, "content")
    for f_name in os.listdir(content_dir):

        if not f_name.endswith(".json.gz"): continue

        f_path = os.path.join(content_dir, f_name)
        info = open_gz(f_path)

        ep_num += len(info["episodes"])

    print(f"{data_mode} : {ep_num}")


easy : 600
medium : 684
hard : 684


##### Rough : To Compare 

In [57]:
og_content = "/mnt/PersONAL/data/split/hard/content"
filt_content = "/mnt/PersONAL/data/split/test_baselines/hard_filt/content"

og_c = 0
filt_c = 0
for f_name in os.listdir(og_content):

    f_og_path = os.path.join(og_content, f_name)
    f_filt_path = os.path.join(filt_content, f_name)

    f_og = open_gz(f_og_path)
    f_filt = open_gz(f_filt_path)

    # print(f"{len(f_og['episodes'])}, {len(f_filt['episodes'])}")

    og_c += len(f_og["episodes"])
    filt_c += len(f_filt["episodes"])

print(filt_c, og_c)

680 700


In [34]:
len(f_og["episodes"])

38

In [35]:
len(f_filt["episodes"])

37

In [40]:
c = 0
for ep in f_og["episodes"]:

    if type(ep["object_id"]) is not str:
        c += 1
        print(ep)

print(c)

{'episode_id': '3599', 'scene_id': 'hm3d_v0.2/val/00891-cvZr5TUy5C5/cvZr5TUy5C5.basis.glb', 'scene_dataset_config': './data/scene_datasets/hm3d_v0.2/hm3d_annotated_basis.scene_dataset_config.json', 'object_category': 'mirror', 'object_id': ['mirror_372', 'mirror_228'], 'description': [['Mirror located over a bathroom wooden counter and sink and next to a bath tub', '', ''], ['Mirror inside wooden dresser located on the right side of the bed', '', '']], 'owner': 'Giovanni', 'floor_id': '2', 'summary': 'On the second floor, the master bedroom features an elegant old style grey bed with two red pillows, a bed table near the white door, and a dark red sofa on the opposite side of the king-sized bed. Cameron, Jasmine, and Maria all own the bed, with Cameron also sharing the dark red sofa set with Logan, who additionally owns a red pillow and a mirror above a bathroom counter in another room. The bed table is owned by both Beau and Emmett, the latter of whom also possesses the red pillow and

### Rough

In [4]:
ind = 0

print(items[ind][2], items[ind][-1])

category mpcat40


In [37]:
cats = np.unique([r[2] for r in items if len(r)>3])
cats_mp40 = np.unique([r[-1] for r in items if len(r)>3])

print(len(cats), len(cats_mp40))

1147 43


In [38]:
cats_mp40

array(['appliances', 'bathtub', 'beam', 'bed', 'blinds', 'board_panel',
       'cabinet', 'ceiling', 'chair', 'chest_of_drawers', 'clothes',
       'column', 'counter', 'curtain', 'cushion', 'door', 'fireplace',
       'floor', 'furniture', 'gym_equipment', 'lighting', 'mirror',
       'misc', 'mpcat40', 'objects', 'picture', 'plant', 'railing',
       'seating', 'shelving', 'shower', 'sink', 'sofa', 'stairs', 'stool',
       'table', 'toilet', 'towel', 'tv_monitor', 'unlabeled', 'void',
       'wall', 'window'], dtype='<U16')

dict_keys(['goals_by_category', 'episodes', 'category_to_task_category_id', 'category_to_scene_annotation_category_id'])

In [24]:
info["episodes"][0]

{'episode_id': '195',
 'scene_id': 'hm3d_v0.2/val/00877-4ok3usBNeis/4ok3usBNeis.basis.glb',
 'scene_dataset_config': './data/scene_datasets/hm3d_v0.2/hm3d_annotated_basis.scene_dataset_config.json',
 'object_category': 'couch',
 'object_id': 'couch_160',
 'description': ['plaid couch in the room. it is of a red-brown color.',
  '',
  ''],
 'owner': 'Sebastian',
 'floor_id': '0',
 'summary': "On the ground floor, there is a plaid couch in the living room, distinguished by its red-brown color. This couch belongs to Sebastian. In the bathroom on the same floor, a washbasin is situated near the toilet seat and the mirror, and it is owned by Wesley. Each person's belongings add character to their respective rooms.",
 'extracted_summary': ['Sebastian owns a plaid red-brown couch in the living room',
  'Wesley owns a washbasin near the toilet seat and the mirror in the bathroom'],
 'query': ["Find Sebastian's couch",
  "Where is Sebastian's couch?",
  "Locate Sebastian's couch",
  "Retrieve S

In [122]:
personal_to_mp3d = {}
personal_counts = {}

scene_info_dir = os.path.dirname(scene_info_path)

for scene_path in os.listdir(scene_info_dir):

    scene_path = os.path.join(scene_info_dir, scene_path)
    with gzip.open(scene_path, "r") as f:
        scene_info = json.load(f)

    for ep in scene_info["episodes"]:

        obj_cat = ep["object_category"]

        cat_row = next(r for r in items_filt if r[2] == obj_cat)
        obj_cat_map = cat_row[-1]
        obj_cat_id = int(cat_row[-2]) + 1

        obj_cat_map = f"{obj_cat_map}_{str(obj_cat_id)}"
        
        # cat_map = [r[-1] for r in items_filt if r[2] == obj_cat]
        # assert len(cat_map)>0 and len(np.unique(cat_map))==1

        # obj_cat_map = cat_map[0]

        personal_to_mp3d[obj_cat] = obj_cat_map

        if obj_cat_map in personal_counts:
            personal_counts[obj_cat_map] += 1
        else:
            personal_counts[obj_cat_map] = 1

In [123]:
personal_to_mp3d

{'kitchen cabinet': 'cabinet_8',
 'microwave': 'appliances_38',
 'sideboard': 'misc_41',
 'bathroom counter': 'counter_27',
 'bed': 'bed_12',
 'chair': 'chair_4',
 'lamp': 'lighting_29',
 'refrigerator': 'appliances_38',
 'cushion': 'cushion_9',
 'chest of drawers': 'chest_of_drawers_14',
 'nightstand': 'chest_of_drawers_14',
 'mirror': 'mirror_22',
 'blanket': 'objects_40',
 'armchair': 'chair_4',
 'washer-dryer': 'misc_41',
 'table': 'table_6',
 'tv': 'tv_monitor_23',
 'cooker': 'objects_40',
 'picture': 'picture_7',
 'couch': 'sofa_11',
 'dining chair': 'chair_4',
 'toilet': 'toilet_19',
 'pillow': 'cushion_9',
 'computer chair': 'chair_4',
 'washing machine': 'appliances_38',
 'computer desk': 'table_6',
 'kitchen shelf': 'shelving_32',
 'dishwasher': 'appliances_38',
 'sofa seat': 'misc_41',
 'oven': 'appliances_38',
 'shelf': 'shelving_32',
 'calendar': 'misc_41',
 'sofa set': 'sofa_11',
 'bed table': 'table_6',
 'railing': 'railing_31',
 'bathroom cabinet': 'cabinet_8',
 'plant'

In [124]:
print(len(personal_to_mp3d.keys()), len(np.unique(list(personal_to_mp3d.values()))))

114 31


In [125]:
personal_to_mp3d.values()

dict_values(['cabinet_8', 'appliances_38', 'misc_41', 'counter_27', 'bed_12', 'chair_4', 'lighting_29', 'appliances_38', 'cushion_9', 'chest_of_drawers_14', 'chest_of_drawers_14', 'mirror_22', 'objects_40', 'chair_4', 'misc_41', 'table_6', 'tv_monitor_23', 'objects_40', 'picture_7', 'sofa_11', 'chair_4', 'toilet_19', 'cushion_9', 'chair_4', 'appliances_38', 'table_6', 'shelving_32', 'appliances_38', 'misc_41', 'appliances_38', 'shelving_32', 'misc_41', 'sofa_11', 'table_6', 'railing_31', 'cabinet_8', 'plant_15', 'bathtub_26', 'cabinet_8', 'objects_40', 'stool_20', 'fireplace_28', 'objects_40', 'objects_40', 'misc_41', 'objects_40', 'chest_of_drawers_14', 'appliances_38', 'objects_40', 'objects_40', 'furniture_37', 'objects_40', 'misc_41', 'objects_40', 'clothes_39', 'misc_41', 'towel_21', 'chest_of_drawers_14', 'objects_40', 'appliances_38', 'clothes_39', 'furniture_37', 'counter_27', 'floor_3', 'objects_40', 'misc_41', 'seating_35', 'floor_3', 'railing_31', 'objects_40', 'objects_40',

In [126]:
np.unique(list(personal_to_mp3d.values()))

array(['appliances_38', 'bathtub_26', 'bed_12', 'board_panel_36',
       'cabinet_8', 'chair_4', 'chest_of_drawers_14', 'clothes_39',
       'counter_27', 'cushion_9', 'fireplace_28', 'floor_3',
       'furniture_37', 'lighting_29', 'mirror_22', 'misc_41',
       'objects_40', 'picture_7', 'plant_15', 'railing_31', 'seating_35',
       'shelving_32', 'shower_24', 'sink_16', 'sofa_11', 'stairs_17',
       'stool_20', 'table_6', 'toilet_19', 'towel_21', 'tv_monitor_23'],
      dtype='<U19')

In [127]:
personal_counts

{'cabinet_8': 31,
 'appliances_38': 61,
 'misc_41': 41,
 'counter_27': 11,
 'bed_12': 39,
 'chair_4': 74,
 'lighting_29': 14,
 'cushion_9': 41,
 'chest_of_drawers_14': 38,
 'mirror_22': 34,
 'objects_40': 64,
 'table_6': 44,
 'tv_monitor_23': 25,
 'picture_7': 27,
 'sofa_11': 14,
 'toilet_19': 12,
 'shelving_32': 13,
 'railing_31': 4,
 'plant_15': 3,
 'bathtub_26': 7,
 'stool_20': 3,
 'fireplace_28': 9,
 'furniture_37': 7,
 'clothes_39': 12,
 'towel_21': 13,
 'floor_3': 14,
 'seating_35': 4,
 'sink_16': 13,
 'shower_24': 2,
 'board_panel_36': 3,
 'stairs_17': 3}

In [128]:
sum(personal_counts.values())

680

In [129]:
items_filt[0]

['index',
 'raw_category',
 'category',
 'count',
 'nyuId',
 'nyu40id',
 'eigen13id',
 'nyuClass',
 'nyu40class',
 'eigen13class',
 'ModelNet40',
 'ModelNet10',
 'ShapeNetCore55',
 'synsetoffset',
 'wnsynsetid',
 'wnsynsetkey',
 'mpcat40index',
 'mpcat40']

In [130]:
mp_cat_ids = [4, 11, 15, 12, 19, 23, 26, 24, 28, 38, 21, 16, 14, 6, 16]
mp_cat_names = []

for id in mp_cat_ids:
    id_row = next(r for r in items_filt if r[16] == str(id-1))
    mp_cat_names.append(id_row[-1])

mp_cat_names

['chair',
 'sofa',
 'plant',
 'bed',
 'toilet',
 'tv_monitor',
 'bathtub',
 'shower',
 'fireplace',
 'appliances',
 'towel',
 'sink',
 'chest_of_drawers',
 'table',
 'sink']

In [131]:
filt_counts = {}

for k, v in personal_counts.items():

    id = int( k.split("_")[-1] )
    if id in mp_cat_ids:
        filt_counts[k] = v


print(f"{sum(filt_counts.values())}/{sum(personal_counts.values())}\n")

filt_counts

354/680



{'appliances_38': 61,
 'bed_12': 39,
 'chair_4': 74,
 'chest_of_drawers_14': 38,
 'table_6': 44,
 'tv_monitor_23': 25,
 'sofa_11': 14,
 'toilet_19': 12,
 'plant_15': 3,
 'bathtub_26': 7,
 'fireplace_28': 9,
 'towel_21': 13,
 'sink_16': 13,
 'shower_24': 2}

In [5]:
import os 
import numpy as np
import gzip
import json

root_dir = "data/objectgoal_PersONAL/active_new/val/l3mvn_baseline"

os.listdir(root_dir)

['easy', 'medium', 'hard']

In [10]:
from constants import mp_cat_to_hm3d_id, mp_categories_mapping, hm3d_category

In [13]:
content_dir = os.path.join(root_dir, "easy/content")
eps = 0

for f in os.listdir(content_dir):

    f_path = os.path.join(content_dir, f)

    with gzip.open(f_path, "r") as i:
        info = json.load(i)

    info = info["episodes"]
    eps += len(info)
    for ep_info in info:
        scene_id, ep_id = ep_info["scene_id"].split("/")[-1].split(".")[0], \
                            ep_info["episode_id"]

        obj_cat, obj_descr = ep_info["object_category"], ep_info["description"][0]
        hm3d_id = mp_cat_to_hm3d_id(obj_cat, 
                                        'data/matterport_category_mappings.tsv',
                                        mp_categories_mapping)

print(eps)

    

436


In [12]:
ep_info

{'episode_id': '216',
 'scene_id': 'hm3d_v0.2/val/00877-4ok3usBNeis/4ok3usBNeis.basis.glb',
 'scene_dataset_config': './data/scene_datasets/hm3d_v0.2/hm3d_annotated_basis.scene_dataset_config.json',
 'object_category': 'couch',
 'object_id': 'couch_269',
 'description': ['black leather couch in the room. the couch is located near a lamp table. it is also close to an old tv.',
  '',
  ''],
 'owner': 'Phoebe',
 'floor_id': '1',
 'summary': "On the first floor's bathroom, Harper owns a washbasin, featuring a double sink vanity with a mirror above it, situated next to the toilet seat and blue shower curtains. In the kitchen on the same floor, Alani is the owner of a double sink cabinet located beside the kitchen counter and dishwasher. Moving to the bedroom, Kingston possesses a white cloth placed near the dresser and the curtain, close to the bed in the room's corner. In the living room on the first floor, Phoebe owns a black leather couch positioned near a lamp table and close to an old 