In [1]:
import torch

from ai2thor.platform import CloudRendering
from env.environment import HomeServiceEnvironment

THOR_CONTROLLER_KWARGS = {
    "snapToGrid": True,
    "fastActionEmit": True,
    "renderDepthImage": True,
    "scene": "Procedural",
    "x_display": None,
    "gpu_device": 0,
    "platform": CloudRendering,
}
env = HomeServiceEnvironment(
    force_cache_reset=True,
    controller_kwargs=THOR_CONTROLLER_KWARGS,
)

  from .autonotebook import tqdm as notebook_tqdm
Loading train: 100%|██████████| 10000/10000 [00:01<00:00, 8559.89it/s]
Loading val: 100%|██████████| 1000/1000 [00:00<00:00, 8813.21it/s]
Loading test: 100%|██████████| 1000/1000 [00:00<00:00, 8615.97it/s]


In [3]:
from collections import defaultdict
from datagen.datagen_constants import GOOD_4_ROOM_HOUSES

stage_to_pickupable_type_to_scenes = dict()
stage_to_receptacle_type_to_scenes = dict()
stage_to_scenes = dict()

for split, scene_idxs in GOOD_4_ROOM_HOUSES.items():
    pickupable_type_to_scenes = defaultdict(set)
    receptacle_type_to_scenes = defaultdict(set)
    stage_to_scenes[split] = dict()

    for scene_idx in scene_idxs:
        scene = f"{split}_{scene_idx}"
        env.procthor_reset(scene_name=scene)
        if (
            set([room["roomType"] for room in env.current_house["rooms"]])
            != set(['Bathroom', 'Bedroom', 'Kitchen', 'LivingRoom'])
        ):
            continue

        room_type_to_room_id = {room["roomType"]: room['id'] for room in env.current_house["rooms"]}

        room_id_to_pickupable_ids = {
            **{
                k: list(v)
                for k, v in env.room_to_pickupable_ids().items()
            }
        }
        room_id_to_static_receptacle_ids = {
            **{
                k: list(v)
                for k, v in env.room_to_static_receptacle_ids().items()
            }
        }
        
        room_ids_to_pickupable_type_to_ids = {
            room_id: defaultdict(list)
            for room_id in room_id_to_pickupable_ids
        }
        for room_id, pids in room_id_to_pickupable_ids.items():
            for pid in pids:
                ptype = env.ids_to_objs()[pid]["objectType"]
                room_ids_to_pickupable_type_to_ids[room_id][env.ids_to_objs()[pid]["objectType"]].append(pid)

        room_ids_to_receptacle_type_to_ids = {
            room_id: defaultdict(list)
            for room_id in room_id_to_static_receptacle_ids
        }
        for room_id, rids in room_id_to_static_receptacle_ids.items():
            for rid in rids:
                rtype = env.ids_to_objs()[rid]["objectType"]
                room_ids_to_receptacle_type_to_ids[room_id][rtype].append(rid)

        for room_id in room_ids_to_pickupable_type_to_ids:
            for ptype in room_ids_to_pickupable_type_to_ids[room_id].keys():
                pickupable_type_to_scenes[ptype].add(scene)
        for room_id in room_ids_to_receptacle_type_to_ids:
            for rtype in room_ids_to_receptacle_type_to_ids[room_id].keys():
                receptacle_type_to_scenes[rtype].add(scene)

        scene_info = {
            "room_type_to_room_id": room_type_to_room_id,
            "pickupables": {
                "room_id_to_obj_ids": room_id_to_pickupable_ids,
                "room_id_to_obj_types_to_ids": room_ids_to_pickupable_type_to_ids,
            },
            "receptacles": {
                "room_id_to_obj_ids": room_id_to_static_receptacle_ids,
                "room_id_to_obj_types_to_ids": room_ids_to_receptacle_type_to_ids,
            },
        }
        stage_to_scenes[split][scene] = scene_info
    stage_to_pickupable_type_to_scenes[split] = {
        **{
            k: list(v)
            for k, v in pickupable_type_to_scenes.items()
        }
    }
    stage_to_receptacle_type_to_scenes[split] = {
        **{
            k: list(v)
            for k, v in receptacle_type_to_scenes.items()
        }
    }
        




In [9]:
import compress_pickle
import json
import os
from env.constants import STARTER_HOME_SERVICE_DATA_DIR

os.makedirs(os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata"), exist_ok=True)
for stage in stage_to_scenes:
    with open(os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_scenes.json"), "w") as f:
        json.dump(stage_to_scenes[stage], f)
    with open(os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_pickupables.json"), "w") as f:
        json.dump(stage_to_pickupable_type_to_scenes[stage], f)
    with open(os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_receptacles.json"), "w") as f:
        json.dump(stage_to_receptacle_type_to_scenes[stage], f)

    compress_pickle.dump(
        obj=stage_to_scenes[stage],
        path=os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_scenes.pkl.gz"),
    )
    compress_pickle.dump(
        obj=stage_to_pickupable_type_to_scenes[stage],
        path=os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_pickupables.pkl.gz"),
    )
    compress_pickle.dump(
        obj=stage_to_receptacle_type_to_scenes[stage],
        path=os.path.join(STARTER_HOME_SERVICE_DATA_DIR, "metadata", f"{stage}_metadata_receptacles.pkl.gz"),
    )

In [1]:
from datagen.datagen_constants import STAGE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED

In [2]:
[
    len(STAGE_TO_VALID_TASKS[stage])
    for stage in ("train", "val", "test")
]

[363, 419, 442]

In [3]:
from datagen.datagen_constants import STAGE_TO_MIN_SCENES, STAGE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED

___stage_to_task_to_scenes = {}
___stage_to_check_tasks = {}
for stage in ("train", "val", "test"):
    task_to_scenes = {}
    check_tasks = set()
    done = False
    for it, scene in enumerate(STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED[stage]):
        for task in STAGE_TO_SCENE_TO_VALID_TASKS[stage][scene]:
            if task not in task_to_scenes:
                task_to_scenes[task] = set()
            task_to_scenes[task].add(scene)
            if len(task_to_scenes[task]) >= STAGE_TO_MIN_SCENES[stage] and task not in check_tasks:
                check_tasks.add(task)

            if (
                len(task_to_scenes.keys()) == len(STAGE_TO_VALID_TASKS[stage])
                and all(
                    len(scenes) >= STAGE_TO_MIN_SCENES[stage] for scenes in task_to_scenes.values()
                )
            ):
                done = True
                break

        if done:
            print(it, scene)
            ___stage_to_task_to_scenes[stage] = task_to_scenes
            ___stage_to_check_tasks[stage] = check_tasks
            break

1380 train_5896
113 val_271
119 test_294


In [6]:
from datagen.datagen_constants import STAGE_TO_VALID_TASK_TO_SCENES

STAGE_TO_VALID_TASK_TO_NUM_SCENES = {
    stage: {
        task: len(STAGE_TO_VALID_TASK_TO_SCENES[stage][task])
        for task in STAGE_TO_VALID_TASK_TO_SCENES[stage]
    }
    for stage in STAGE_TO_VALID_TASK_TO_SCENES
}

STAGE_TO_VALID_TASK_TO_NUM_SCENES_SORTED = {
    stage: dict(sorted(STAGE_TO_VALID_TASK_TO_NUM_SCENES[stage].items(), key=lambda item: item[1]))
    for stage in STAGE_TO_VALID_TASK_TO_SCENES
}

In [28]:
from datagen.datagen_constants import (
    STAGE_TO_MIN_SCENES_COVERING_ALL_VALID_TASKS, 
    STAGE_TO_NUM_MIN_SCENES_COVERING_ALL_VALID_TASKS, 
    STAGE_TO_SCENE_TO_VALID_TASKS,
    STAGE_TO_VALID_TASK_TO_SCENES,
    STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED,
    STAGE_TO_DEST_NUM_SCENES
)
tt = {}
for stage in STAGE_TO_MIN_SCENES_COVERING_ALL_VALID_TASKS:
    task_to_scenes = {}
    valid_scenes = []
    for scene in STAGE_TO_MIN_SCENES_COVERING_ALL_VALID_TASKS[stage]:
        valid_scenes.append(scene)
        for task in STAGE_TO_SCENE_TO_VALID_TASKS[stage][scene]:
            if task not in task_to_scenes:
                task_to_scenes[task] = []
            task_to_scenes[task].append(scene)
    task_to_num_scenes = {
        task: len(task_to_scenes[task])
        for task in task_to_scenes
    }
    task_to_scenes = dict(
        sorted(
            task_to_scenes.items(),
            key=lambda item: task_to_num_scenes[item[0]],
        )
    )
    task_to_num_scenes = dict(
        sorted(
            task_to_num_scenes.items(),
            key=lambda item: item[1],
        )
    )

    min_num_task = list(task_to_scenes.keys())[0]
    while task_to_num_scenes[min_num_task] < STAGE_TO_DEST_NUM_SCENES[stage]:
        possible_scenes = STAGE_TO_VALID_TASK_TO_SCENES[stage][min_num_task]
        for scene in STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED[stage]:
            if scene in possible_scenes and scene not in valid_scenes:
                valid_scenes.append(scene)
                for task in STAGE_TO_SCENE_TO_VALID_TASKS[stage][scene]:
                    task_to_scenes[task].append(scene)
                    task_to_num_scenes[task] += 1
                task_to_scenes = dict(
                    sorted(
                        task_to_scenes.items(),
                        key=lambda item: task_to_num_scenes[item[0]],
                    )
                )
                min_num_task = list(task_to_scenes.keys())[0]
                break
            
    tt[stage] = {
        "scenes": task_to_scenes,
        "num_scenes": task_to_num_scenes,
    }

In [49]:
from datagen.datagen_constants import STAGE_TO_VALID_TASK_TO_SCENES, STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED
min_num_task = list(tt["train"]["scenes"].keys())[0]
valid_scenes = set(
    [
        scene 
        for task, scenes in tt["train"]["scenes"].items()
        for scene in scenes
    ]
)
possible_scenes = STAGE_TO_VALID_TASK_TO_SCENES["train"][min_num_task]
for scene in STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED["train"]:
    if scene in possible_scenes and scene not in valid_scenes:
        valid_scenes.add(scene)
        for task in STAGE_TO_SCENE_TO_VALID_TASKS["train"][scene]:
            tt['train']['scenes'][task].append(scene)
            tt['train']['num_scenes'][task] += 1

        task_to_num_scenes = dict(
            sorted(
                tt['train']['num_scenes'].items(),
                key=lambda item: tt['train']['num_scenes'][item[0]],
            )
        )
        task_to_scenes = dict(
            sorted(
                tt['train']['scenes'].items(),
                key=lambda item: tt['train']['num_scenes'][item[0]],
            )
        )
        tt["train"]["scenes"] = task_to_scenes
        tt["train"]["num_scenes"] = task_to_num_scenes
        min_num_task = list(task_to_scenes.keys())[0]
        break

In [1]:
from datagen.datagen_constants import (
    STAGE_TO_VALID_TASK_TO_NUM_SCENES_SORTED, 
    STAGE_TO_VALID_TASK_TO_SCENES, 
    STAGE_TO_SCENE_TO_VALID_TASKS,
    STAGE_TO_DEST_NUM_SCENES,
)

stage_to_task_to_scenes_ = {}
stage_to_task_to_num_scenes_ = {}
stage_to_scene_set_ = {}
for stage in STAGE_TO_VALID_TASK_TO_NUM_SCENES_SORTED:
    scene_set = set()
    task_to_scenes = {}
    for task in STAGE_TO_VALID_TASK_TO_NUM_SCENES_SORTED[stage]:
        for scene in STAGE_TO_VALID_TASK_TO_SCENES[stage][task]:
            if scene not in scene_set:
                scene_set.add(scene)
                for task in STAGE_TO_SCENE_TO_VALID_TASKS[stage][scene]:
                    if task not in task_to_scenes:
                        task_to_scenes[task] = set()
                    task_to_scenes[task].add(scene)
        task_to_num_scenes = {
            task: len(task_to_scenes[task])
            for task in task_to_scenes
        }
        task_to_scenes = dict(
            sorted(
                task_to_scenes.items(),
                key=lambda item: task_to_num_scenes[item[0]],
            )
        )
        task_to_num_scenes = dict(
            sorted(
                task_to_num_scenes.items(),
                key=lambda item: item[1],
            )
        )
        if task_to_num_scenes[list(task_to_scenes.keys())[0]] >= STAGE_TO_DEST_NUM_SCENES[stage]:
            break
    
    stage_to_task_to_scenes_[stage] = task_to_scenes
    stage_to_task_to_num_scenes_[stage] = task_to_num_scenes
    stage_to_scene_set_[stage] = scene_set

In [10]:
len(stage_to_scene_set_["train"])

849

In [8]:
[
    scene
    for scene in stage_to_task_to_scenes_["train"]["train_pick_and_place_SoapBar_SideTable"]
    if scene not in stage_to_task_to_scenes_["train"]["train_pick_and_place_Pillow_Bed"]
]

['train_5514', 'train_1788', 'train_1594']

In [23]:
from datagen.datagen_constants import STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED
for scene in STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED["train"]:
    if scene in possible_scenes:
        break
print(scene)

train_7967


In [25]:
list(STAGE_TO_SCENE_TO_NUM_VALID_TASKS_SORTED["train"].keys())

['train_7278',
 'train_6497',
 'train_3435',
 'train_8414',
 'train_7967',
 'train_9361',
 'train_8510',
 'train_1919',
 'train_8208',
 'train_188',
 'train_1095',
 'train_3944',
 'train_7544',
 'train_6838',
 'train_7620',
 'train_7661',
 'train_1324',
 'train_2206',
 'train_3545',
 'train_4264',
 'train_6953',
 'train_7031',
 'train_8638',
 'train_9060',
 'train_1788',
 'train_3830',
 'train_4706',
 'train_9268',
 'train_829',
 'train_3135',
 'train_9814',
 'train_3482',
 'train_5691',
 'train_7317',
 'train_7424',
 'train_6278',
 'train_1810',
 'train_6242',
 'train_4284',
 'train_8109',
 'train_3274',
 'train_5381',
 'train_9865',
 'train_6822',
 'train_7839',
 'train_4599',
 'train_7431',
 'train_8927',
 'train_9026',
 'train_8287',
 'train_8991',
 'train_291',
 'train_5123',
 'train_7137',
 'train_9953',
 'train_148',
 'train_4304',
 'train_3507',
 'train_1205',
 'train_6823',
 'train_9943',
 'train_2720',
 'train_5195',
 'train_5198',
 'train_8056',
 'train_8838',
 'train_3321',

In [7]:
import json
import compress_pickle

stage = "train"
mdsets = ("scenes", "pickupables", "receptacles")
md = {
    stage: {
        mdset: json.load(open(f'data/home_service/metadata/{stage}_metadata_{mdset}.json', 'r'))
        for mdset in mdsets
    }
}


In [11]:
mdc = {
    stage: {
        mdset: compress_pickle.load(f'data/home_service/metadata/{stage}_metadata_{mdset}.pkl.gz')
        for mdset in mdsets
    }
}

In [51]:
num_houses = {
    "train": 10000,
    "val": 1000,
    "test": 1000,
}
good_rooms = {}
for split in ["train", "val", "test"]:
    good_rooms[split] = []
    for i in range(num_houses[split]):
        house = env._houses._data[split][i]
        if house["metadata"]["roomSpecId"] != "4-room":
            continue
        env.procthor_reset(scene_name=f"{split}_{i}", force_reset=True, place_agent=True)
        ch = env.current_house
        merged_rooms = ch["rooms"]
        if len(merged_rooms) != 4:
            continue
        good_rooms[split].append(i)


In [52]:
for split in good_rooms:
    print(f'split: {split} | num: {len(good_rooms[split])}')

split: train | num: 1467
split: val | num: 143
split: test | num: 146


In [1]:
from datagen.datagen_constants import HOME_SERVICE_HOUSES_METADATA, STAGE_TO_TASKS

In [2]:
stage = "train"
stage_to_tasks = {
    k: v
    for k, v in STAGE_TO_TASKS.items()
    if k in [stage]
}

In [3]:
len(stage_to_tasks[stage])

436

In [4]:
stage_to_task_to_scenes = {
    stage: {}
}
for stage, tasks in stage_to_tasks.items():
    for task in tasks:
        pick, recep = task.split("_")[-2:]
        stage_to_task_to_scenes[stage][task] = [
            scene
            for scene in HOME_SERVICE_HOUSES_METADATA[stage]["scenes"]
            if (
                scene in HOME_SERVICE_HOUSES_METADATA[stage]["pickupables"][pick]
                and scene in HOME_SERVICE_HOUSES_METADATA[stage]["receptacles"][recep]
            )
        ]

In [8]:
for stage in stage_to_tasks:
    for task in stage_to_tasks[stage]:
        print(f'{task}: {len(stage_to_task_to_scenes[stage][task])}')

train_pick_and_place_AlarmClock_Dresser: 721
train_pick_and_place_AlarmClock_Desk: 173
train_pick_and_place_AlarmClock_SideTable: 333
train_pick_and_place_AlarmClock_DiningTable: 713
train_pick_and_place_AlarmClock_TVStand: 483
train_pick_and_place_AlarmClock_CoffeeTable: 10
train_pick_and_place_AlarmClock_CounterTop: 719
train_pick_and_place_AlarmClock_Shelf: 530
train_pick_and_place_Apple_Microwave: 147
train_pick_and_place_Apple_Fridge: 942
train_pick_and_place_Apple_Sink: 725
train_pick_and_place_Apple_DiningTable: 966
train_pick_and_place_Apple_TVStand: 635
train_pick_and_place_Apple_CoffeeTable: 23
train_pick_and_place_Apple_SideTable: 419
train_pick_and_place_Apple_Desk: 205
train_pick_and_place_Apple_CounterTop: 975
train_pick_and_place_Apple_GarbageCan: 903
train_pick_and_place_Apple_Dresser: 972
train_pick_and_place_BaseballBat_Bed: 811
train_pick_and_place_BaseballBat_DiningTable: 804
train_pick_and_place_BaseballBat_TVStand: 536
train_pick_and_place_BaseballBat_CoffeeTable:

In [10]:
stage_to_task_to_num_scenes = {
    stage: {}
}
for task in stage_to_tasks[stage]:
    stage_to_task_to_num_scenes[stage][task] = len(stage_to_task_to_scenes[stage][task])
    if len(stage_to_task_to_scenes[stage][task]) < 100:
        print(f'num_scene for ask {task}: {len(stage_to_task_to_scenes[stage][task])}')
        continue


num_scene for ask train_pick_and_place_AlarmClock_CoffeeTable: 10
num_scene for ask train_pick_and_place_Apple_CoffeeTable: 23
num_scene for ask train_pick_and_place_BaseballBat_CoffeeTable: 17
num_scene for ask train_pick_and_place_BasketBall_CoffeeTable: 10
num_scene for ask train_pick_and_place_Book_Ottoman: 18
num_scene for ask train_pick_and_place_Book_Cabinet: 78
num_scene for ask train_pick_and_place_Book_CoffeeTable: 30
num_scene for ask train_pick_and_place_Bottle_Desk: 99
num_scene for ask train_pick_and_place_Bottle_Cabinet: 24
num_scene for ask train_pick_and_place_Bottle_CoffeeTable: 13
num_scene for ask train_pick_and_place_Bowl_Cabinet: 78
num_scene for ask train_pick_and_place_Bowl_CoffeeTable: 29
num_scene for ask train_pick_and_place_Box_Cabinet: 53
num_scene for ask train_pick_and_place_Box_CoffeeTable: 15
num_scene for ask train_pick_and_place_Box_Ottoman: 15
num_scene for ask train_pick_and_place_Bread_Microwave: 74
num_scene for ask train_pick_and_place_Bread_Coff

In [13]:
from datagen.datagen_constants import STAGE_TO_VALID_TASKS, STAGE_TO_TASK_TO_SCENES

for stage in STAGE_TO_VALID_TASKS:
    print(f"{stage}: {len(STAGE_TO_VALID_TASKS[stage])}")

stage_to_common_scenes = {}
for stage in STAGE_TO_VALID_TASKS:
    scenes = None
    for task in STAGE_TO_VALID_TASKS[stage]:
        task_scenes = STAGE_TO_TASK_TO_SCENES[stage][task]
        if scenes is None:
            scenes = task_scenes
        scenes = [scene for scene in scenes if scene in task_scenes]
    stage_to_common_scenes[stage] = scenes
        

train: 346
val: 71
test: 70


In [14]:
stage_to_common_scenes

{'train': [],
 'val': ['val_11',
  'val_101',
  'val_399',
  'val_528',
  'val_700',
  'val_733',
  'val_751',
  'val_864',
  'val_889'],
 'test': ['test_119', 'test_132', 'test_211', 'test_504', 'test_522']}

In [29]:
from datagen.datagen_constants import PICKUPABLES_TO_RECEPTACLES

picks = list(PICKUPABLES_TO_RECEPTACLES.keys())
print(len(picks))
print(len(set(picks)))


56
56


In [27]:
from datagen.datagen_constants import STAGE_TO_TASKS

for stage in STAGE_TO_TASKS:
    tasks = STAGE_TO_TASKS[stage]
    while len(tasks) > 0:
        t = tasks.pop()
        if t in tasks:
            print(t)

train_pick_and_place_CreditCard_Shelf
val_pick_and_place_Plate_Sink
val_pick_and_place_CreditCard_Shelf
test_pick_and_place_Plate_Sink
test_pick_and_place_CreditCard_Shelf


In [26]:
from datagen.datagen_constants import STAGE_TO_SCENE_TO_TASKS, STAGE_TO_SCENE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_NUM_TASKS, STAGE_TO_SCENE_TO_NUM_VALID_TASKS

for stage in STAGE_TO_VALID_TASKS:
    vtasks = STAGE_TO_VALID_TASKS[stage]
    while len(vtasks) > 0:
        t = vtasks.pop()
        if t in vtasks:
            print(t)
            

train_pick_and_place_CreditCard_Shelf
val_pick_and_place_Plate_Sink
test_pick_and_place_Plate_Sink


In [10]:
from collections import defaultdict
from datagen.datagen_constants import STAGE_TO_TASKS

stage_to_scene_to_tasks = {stage: {} for stage in STAGE_TO_VALID_TASKS}
stage_to_scene_to_num_tasks = {stage: {} for stage in STAGE_TO_VALID_TASKS}
for stage in STAGE_TO_TASK_TO_SCENES:
    task_to_scenes = STAGE_TO_TASK_TO_SCENES[stage]
    scene_to_tasks = defaultdict(list)
    for task, scenes in task_to_scenes.items():
        if task not in STAGE_TO_VALID_TASKS[stage]:
            continue
        for scene in scenes:
            scene_to_tasks[scene].append(task)
    stage_to_scene_to_tasks[stage] = scene_to_tasks
    stage_to_scene_to_num_tasks[stage] = {
        scene: len(tasks)
        for scene, tasks in scene_to_tasks.items()
    }

In [3]:
from datagen.datagen_constants import STAGE_TO_SCENE_TO_TASKS, STAGE_TO_SCENE_TO_VALID_TASKS, STAGE_TO_SCENE_TO_NUM_TASKS, STAGE_TO_SCENE_TO_NUM_VALID_TASKS

70

In [7]:
{
    stage: dict(sorted(scene_to_num_valid_tasks.items(), key=lambda item: item[1], reverse=True))
    for stage, scene_to_num_valid_tasks in STAGE_TO_SCENE_TO_NUM_VALID_TASKS.items()
}

{'train': {'train_7278': 272,
  'train_6497': 259,
  'train_3435': 252,
  'train_7967': 249,
  'train_8414': 248,
  'train_9361': 248,
  'train_188': 241,
  'train_1919': 240,
  'train_8510': 240,
  'train_1095': 239,
  'train_8208': 239,
  'train_3944': 237,
  'train_7544': 237,
  'train_6838': 234,
  'train_7620': 234,
  'train_7661': 234,
  'train_4264': 233,
  'train_3830': 232,
  'train_7031': 232,
  'train_1324': 230,
  'train_2206': 230,
  'train_6953': 230,
  'train_8638': 230,
  'train_1788': 229,
  'train_9060': 229,
  'train_9268': 229,
  'train_829': 228,
  'train_3545': 228,
  'train_3274': 227,
  'train_3482': 227,
  'train_4706': 227,
  'train_7424': 227,
  'train_3135': 226,
  'train_7317': 225,
  'train_9814': 225,
  'train_1810': 225,
  'train_4284': 224,
  'train_5691': 224,
  'train_6278': 224,
  'train_9865': 223,
  'train_6242': 223,
  'train_5381': 222,
  'train_8109': 222,
  'train_4599': 219,
  'train_8991': 219,
  'train_6822': 219,
  'train_9026': 219,
  'tra