In [36]:
from termios import TIOCM_DSR
import warnings
import random
import json
import sys
import cv2
import tqdm
import os
import copy

from ai2thor.controller import Controller
import ai2thor
from allenact_plugins.ithor_plugin.ithor_util import (
    horizontal_to_vertical_fov,
)
from boolset.tasks_and_samplers import (
    AgentPose,
    HouseAugmenter,
    ProcTHORDataset,
    Vector3,
)

import prior
from allenact.embodiedai.sensors.vision_sensors import DepthSensor
from PIL import Image
from typing import Optional, Tuple

# configurations
THOR_COMMIT_ID = "345a5fc046f25305c66367a484c9ae297107c877"
CAMERA_WIDTH = 224
CAMERA_HEIGHT = 224
HORIZONTAL_FIELD_OF_VIEW = 100
STEP_SIZE = 0.2
ROTATION_DEGREES = 45.0
VISIBILITY_DISTANCE = 1.5

# dataset = prior.load_dataset("procthor-10k")
# train_scenes = dataset["train"]
# val_scenes = dataset["val"]
# SCENES = ProcTHORDataset(
#     [i for i in train_scenes] + [i for i in val_scenes]
# )

# define dataset collection parameters
TRAIN = False if sys.argv[1] == "test" else True
NUM_ANCHORS = 1000 if TRAIN else 100
NUM_STEPS = 4
ROT_ANGLE = 30
ACTIONS = ["MoveAhead", "MoveBack", "RotateLeft", "RotateRight"]
IMG_ROOT = '../data/interactron/train' if TRAIN else '../data/interactron/test'
ANN_PATH = '../data/interactron/annotations/interactron_v1_train.json' if TRAIN \
    else '../data/interactron/annotations/interactron_v1_test.json'
CTRL = Controller(
    commit_id=THOR_COMMIT_ID,
    server_class=ai2thor.fifo_server.FifoServer,
    include_private_scenes=False,
    fastActionEmit=True,
    snapToGrid=False,
    autoSimulation=False,
    autoSyncTransforms=True,
    width=CAMERA_WIDTH,
    height=CAMERA_HEIGHT,
    fieldOfView=horizontal_to_vertical_fov(
        horizontal_fov_in_degrees=HORIZONTAL_FIELD_OF_VIEW,
        width=CAMERA_WIDTH,
        height=CAMERA_HEIGHT,
    ),
    makeAgentsVisible=True,
    visibilityScheme="Distance",
    agentMode="arm",
    rotateStepDegrees=ROTATION_DEGREES,
    visibilityDistance=VISIBILITY_DISTANCE,
    gridSize=STEP_SIZE,
    useMassThreshold=True,
    massThreshold=10,
    platform="CloudRendering",
    branch="nanna-grasp-force",
    scene="ArchitecTHOR-Val-01",
    renderSemanticSegmentation=True
)

In [59]:
def get_top_down_frame(
    controller: Controller, resolution: Optional[Tuple[int, int]] = None
):
    if controller.last_event.frame is None:
        controller.step("Pass")

    start_res = controller.last_event.frame.shape[:2]

    if resolution != start_res:
        controller.step("ChangeResolution", x=resolution[1], y=resolution[0])

    # Setup the top-down camera
    event = controller.step(action="GetMapViewCameraProperties", raise_for_failure=True)
    pose = copy.deepcopy(event.metadata["actionReturn"])

    bounds = event.metadata["sceneBounds"]["size"]
    max_bound = max(bounds["x"], bounds["z"])

    pose["fieldOfView"] = 50
    pose["position"]["y"] += 1.1 * max_bound
    pose["orthographic"] = False
    pose["farClippingPlane"] = 50
    pose["nearClippingPlane"] = 18.5
    del pose["orthographicSize"]

    # add the camera to the scene
    event = controller.step(
        action="AddThirdPartyCamera",
        **pose,
        skyboxColor="white",
        raise_for_failure=True,
    )
    out = event.third_party_camera_frames[-1]

    if resolution != start_res:
        controller.step("ChangeResolution", x=start_res[1], y=start_res[0])

    return out


In [60]:
A = get_top_down_frame(CTRL, resolution=(1080, 1920))
im = Image.fromarray(A)
im.save("frame.png")