In [None]:
import os
import numpy as np
import spark_dsg as dsg
from spark_dsg.mp3d import load_mp3d_info, repartition_rooms, add_gt_room_label
from hydra_gnn.utils import plot_heterogeneous_graph
from hydra_gnn.preprocess_dsgs import convert_label_to_y, add_object_connectivity, get_room_object_dsg

In [None]:
# dataset file paths
hydra_dataset_dir = "/mnt/sdb/dataset/Matterport3D-Hydra/tro_graphs_2022_09_24"
mp3d_housefile_dir = "/mnt/sdb/dataset/Matterport3D-Hydra/house_files"
colormap_data_path = "./data/colormap.csv"
mp3d_label_data_path = "./data/mpcat40.tsv"
word2vec_model_path = "./data/GoogleNews-vectors-negative300.bin"

## Load DSG

In [None]:
# test dataset file paths
test_json_file = "./tests/test_data/x8F5xyUWy9e_0_gt_partial_dsg_1447.json"
gt_house_file = "./tests/test_data/x8F5xyUWy9e.house"

assert os.path.exists(test_json_file)
assert os.path.exists(gt_house_file)


In [None]:
# Load hydra scene graph
G = dsg.DynamicSceneGraph.load(test_json_file)
print("Number of nodes separated by layer: {} ({} total).".format([layer.num_nodes() for layer in G.layers], G.num_nodes()))
gt_house_info = load_mp3d_info(gt_house_file)


In [None]:
# data = G.to_torch(use_heterogeneous=True)
# fig = plot_heterogeneous_graph(data)
# fig.update_layout(
#     scene = dict(
#         xaxis = dict(visible=False),
#         yaxis = dict(visible=False),
#         zaxis =dict(visible=False)
#         )
#     )
    
# fig.show()

## Add room label from GT

In [None]:
# room labels are based on maximum IoU between hydra room bbox and mp3d room segmentation
dsg.add_bounding_boxes_to_layer(G, dsg.DsgLayers.ROOMS)
add_gt_room_label(G, gt_house_info, angle_deg=-90)

## Get room-object graph and convert to torch_data

In [None]:
import gensim
import pandas as pd
from hydra_gnn.preprocess_dsgs import hydra_object_feature_converter, hydra_node_converter

In [None]:
# extract room-object dsg and add object connectivity
G_ro = get_room_object_dsg(G, verbose=True)
add_object_connectivity(G_ro, threshold_near=2.0, threshold_on=1.0, max_near=2.0)

In [None]:
colormap_data = pd.read_csv(colormap_data_path, delimiter=',')
word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=True)

# data conversion
data_heterogeneous = G_ro.to_torch(use_heterogeneous=True, 
                                   node_converter=hydra_node_converter(
                                    object_feature_converter=hydra_object_feature_converter(colormap_data, word2vec_model),
                                    room_feature_converter=lambda i: np.empty(0)))
data_homogeneous = G_ro.to_torch(use_heterogeneous=False, 
                                 node_converter=hydra_node_converter(
                                  object_feature_converter=hydra_object_feature_converter(colormap_data, word2vec_model),
                                  room_feature_converter=lambda i: np.empty(300)))

In [None]:
# get label index data_*.y from hydra node labels
synonym_objects = []
synonym_rooms = [('a', 't'), ('z', 'Z', 'x', 'p', '\x15')] 

object_label_dict, room_label_dict = convert_label_to_y(data_homogeneous, object_synonyms=synonym_objects, room_synonyms=synonym_rooms)
object_label_dict, room_label_dict = convert_label_to_y(data_heterogeneous, object_synonyms=synonym_objects, room_synonyms=synonym_rooms)

In [None]:
print(object_label_dict)
print(room_label_dict)

In [None]:
fig = plot_heterogeneous_graph(data_heterogeneous)
fig.show()

## Debug

In [None]:
# hydra labels
labels = [data_heterogeneous[node_type].label for node_type in data_heterogeneous.node_types]
print('object mp3d labels:', labels[0].tolist())
print('room mp3d labels:', [chr(l) for l in labels[1].tolist()])

In [None]:
# training labels
print(data_heterogeneous['objects'].y)
print(data_heterogeneous['rooms'].y)

In [None]:
print(data_heterogeneous['objects'].x.shape)
print(data_heterogeneous['rooms'].x.shape)