# Make dataset for full vertical view prediction

#### Using the same scan splits as seed 42 in original current room prediction

In [1]:
import sys
import os
import multiprocessing
import random
import numpy as np
import math
import torch
from torch.utils.data import Dataset, DataLoader
# from tensorboardX import SummaryWriter

import utils

In [2]:
try:
    sys.path.append('/opt/MatterSim/build/')  # local docker or Philly
    import MatterSim
except: 
    # local conda env only
    sys.path.append('/home/hoyeung/Documents/vnla/code/build')  
    import MatterSim

In [17]:
os.environ['PT_DATA_DIR'] ='/home/hoyeung/blob_matterport3d/'

In [3]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_tr_room_types.txt', "r") as f:
    room_types = f.read().split('\n')[:-1]
len(room_types)

30

In [4]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_tr_scans.txt', "r") as f:
    asknav_tr_scans = f.read().split('\n')[:-1]
len(asknav_tr_scans)

56

## Extract semantic classifier train, val seen and val unseen scans

In [5]:
def read_indices_and_labels(path):
    feature_ids = []
    labels = []
    with open(path, 'r') as f:
        lines = f.read().split('\n')[:-1]
        print ("Number of lines = {}".format(len(lines)))
        for line in lines:
            long_id, viewix, label = line.split('\t')
            feature_ids.append((long_id, viewix))
            labels.append(label)
    return feature_ids, labels

In [6]:
exp_dir = '/home/hoyeung/blob_experiments/output_philly/20200202_philly_debug_semantics/classifier_seed_42_batch_size_100_lr_0.001_layers_2/'
filename = 'train_indices.txt'
train_indices, train_curr_rm_labels = read_indices_and_labels(os.path.join(exp_dir, filename))
train_indices[100:105]

Number of lines = 224316


[('5LpN3gDmAk7_c75f71faa8b74a8eb875731b3dbe6183', '3'),
 ('5LpN3gDmAk7_c75f71faa8b74a8eb875731b3dbe6183', '4'),
 ('5LpN3gDmAk7_c75f71faa8b74a8eb875731b3dbe6183', '5'),
 ('5LpN3gDmAk7_c75f71faa8b74a8eb875731b3dbe6183', '6'),
 ('5LpN3gDmAk7_c75f71faa8b74a8eb875731b3dbe6183', '7')]

In [7]:
exp_dir = '/home/hoyeung/blob_experiments/output_philly/20200202_philly_debug_semantics/classifier_seed_42_batch_size_100_lr_0.001_layers_2/'
filename = 'val_seen_indices.txt'
val_seen_indices, val_seen_curr_rm_labels = read_indices_and_labels(os.path.join(exp_dir, filename))
val_seen_indices[100:105]

Number of lines = 18540


[('5LpN3gDmAk7_58351a02ce7742bd9a6674db0131a534', '6'),
 ('5LpN3gDmAk7_58351a02ce7742bd9a6674db0131a534', '8'),
 ('5LpN3gDmAk7_58351a02ce7742bd9a6674db0131a534', '21'),
 ('5LpN3gDmAk7_58351a02ce7742bd9a6674db0131a534', '33'),
 ('5LpN3gDmAk7_bc48486938fb46c1b18c3a30767283c5', '4')]

In [8]:
exp_dir = '/home/hoyeung/blob_experiments/output_philly/20200202_philly_debug_semantics/classifier_seed_42_batch_size_100_lr_0.001_layers_2/'
filename = 'val_unseen_indices.txt'
val_unseen_indices, val_unseen_curr_rm_labels = read_indices_and_labels(os.path.join(exp_dir, filename))
val_unseen_indices[100:105]

Number of lines = 18540


[('rPc6DW4iMge_2d20ebd11b9d4855bac237d15181e126', '28'),
 ('rPc6DW4iMge_2d20ebd11b9d4855bac237d15181e126', '29'),
 ('rPc6DW4iMge_2d20ebd11b9d4855bac237d15181e126', '30'),
 ('rPc6DW4iMge_2d20ebd11b9d4855bac237d15181e126', '31'),
 ('rPc6DW4iMge_2d20ebd11b9d4855bac237d15181e126', '32')]

In [11]:
val_unseen_scans = [index[0].split('_')[0] for index in val_unseen_indices]
val_unseen_scans = set(val_unseen_scans)
val_unseen_scans

{'SN83YJsR3w2', 'VVfe2KiqLaN', 'aayBHfsNo7d', 'rPc6DW4iMge'}

In [12]:
seen_scans = [scan for scan in asknav_tr_scans if scan not in val_unseen_scans]
len(seen_scans)

52

## Construct datasets

In [14]:
def retrieve_rm_labels_and_feature_ids_full_vertical(scans, room_types):
    rm_labels = []
    feature_ids = []
    for scan in scans:
        scan_G = utils.load_nav_graphs(scan)
        scan_panos_to_region = utils.load_panos_to_region(scan,"")
        for n in scan_G.nodes:
            room_label_str = scan_panos_to_region[n]
            long_id = scan + '_' + n
            for bottom_viewix in range(12):
                rm_labels.append(room_types.index(room_label_str))
                feature_ids.append((long_id, (bottom_viewix, bottom_viewix+12, bottom_viewix+24)))
    return rm_labels, feature_ids

In [18]:
seen_rm_labels, seen_feature_ids = retrieve_rm_labels_and_feature_ids_full_vertical(seen_scans, room_types)

In [21]:
print ('seen_feature_ids len = {}'.format(len(seen_feature_ids)))
print ('seen_rm_labels len = {}'.format(len(seen_rm_labels)))

seen_feature_ids len = 80952
seen_rm_labels len = 80952


In [22]:
val_unseen_rm_labels, val_unseen_feature_ids = retrieve_rm_labels_and_feature_ids_full_vertical(val_unseen_scans, room_types)
print ('val_unseen_feature_ids len = {}'.format(len(val_unseen_feature_ids)))
print ('val_unseen_rm_labels len = {}'.format(len(val_unseen_rm_labels)))

val_unseen_feature_ids len = 6180
val_unseen_rm_labels len = 6180


In [25]:
val_seen_idx = np.random.choice(len(seen_feature_ids), size=len(val_unseen_feature_ids), replace=False)
print('val_seen_idx length = {}'.format(len(val_seen_idx)))

val_seen_idx length = 6180


In [26]:
# Get tr and val seen datapts
tr_seen_rm_labels = []
tr_seen_feature_ids = []
val_seen_rm_labels = []
val_seen_feature_ids = []
for i in range(len(seen_feature_ids)):
    if i in val_seen_idx:
        val_seen_rm_labels.append(seen_rm_labels[i])
        val_seen_feature_ids.append(seen_feature_ids[i])
    else:
        tr_seen_rm_labels.append(seen_rm_labels[i])
        tr_seen_feature_ids.append(seen_feature_ids[i])
        
print('tr_seen_rm_labels length = {}'.format(len(tr_seen_rm_labels)))
print('tr_seen_feature_ids length = {}'.format(len(tr_seen_feature_ids)))
print('val_seen_rm_labels length = {}'.format(len(val_seen_rm_labels)))
print('val_seen_feature_ids length = {}'.format(len(val_seen_feature_ids)))

tr_seen_rm_labels length = 74772
tr_seen_feature_ids length = 74772
val_seen_rm_labels length = 6180
val_seen_feature_ids length = 6180


In [29]:
val_seen_feature_ids[:5]

[('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (3, 15, 27)),
 ('ac26ZMwG7aT_7804a496622640ce9d5d79ecdc992254', (7, 19, 31)),
 ('ac26ZMwG7aT_0ffa6fd0a65a4462b2023f28b14741e5', (8, 20, 32)),
 ('ac26ZMwG7aT_771f6afffb0c4e6d9fe9cca06b6a9cbb', (6, 18, 30)),
 ('ac26ZMwG7aT_4382fba86a6a4956b7007f079a990268', (8, 20, 32))]

In [30]:
val_seen_rm_labels[:5]

[29, 7, 16, 18, 8]

## Write the indices out

In [31]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_train_full_vertical_indices.txt', 'w') as fh:
    for feat_id, rm_label in zip(tr_seen_feature_ids, tr_seen_rm_labels):
        fh.write("{}\t{}\t{}\n".format(feat_id[0], feat_id[1], rm_label))

In [32]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_val_seen_full_vertical_indices.txt', 'w') as fh:
    for feat_id, rm_label in zip(val_seen_feature_ids, val_seen_rm_labels):
        fh.write("{}\t{}\t{}\n".format(feat_id[0], feat_id[1], rm_label))

In [33]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_val_unseen_full_vertical_indices.txt', 'w') as fh:
    for feat_id, rm_label in zip(val_unseen_feature_ids, val_unseen_rm_labels):
        fh.write("{}\t{}\t{}\n".format(feat_id[0], feat_id[1], rm_label))

## Test parse it

In [39]:
def read_rm_labels_and_feature_ids(idx_save_path, image_extent):
    feature_ids = []
    room_labels = []
    with open(idx_save_path, 'r') as fh:
        lines = fh.read().split('\n')
        lines = lines[:-1]
        for line in lines:
            long_id, viewix, room_label = line.split('\t')
            room_labels.append(int(room_label))
            if image_extent != 'single':
                feature_ids.append((long_id, eval(viewix)))
            else:
                feature_ids.append((long_id, int(viewix)))
    return feature_ids, room_labels

In [41]:
ids, labels = read_rm_labels_and_feature_ids('/home/hoyeung/blob_matterport3d/semantics/asknav_train_full_vertical_indices.txt', 'vertical')

In [37]:
dum = eval('(1,2,3)')

In [38]:
dum

(1, 2, 3)

In [42]:
ids[:5]

[('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (0, 12, 24)),
 ('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (1, 13, 25)),
 ('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (2, 14, 26)),
 ('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (4, 16, 28)),
 ('ac26ZMwG7aT_828b8f02ffd145ac8b351cbb70682dc2', (5, 17, 29))]

In [43]:
isinstance(dum, tuple)

True

In [46]:
torch.empty(4, dtype=torch.float32)

tensor([-5.4842e-08,  4.5799e-41,  1.9043e+21,  3.0873e-41])

In [49]:
torch.cat([torch.tensor([1,2,3], dtype=torch.float) for _ in range(3)])

tensor([1., 2., 3., 1., 2., 3., 1., 2., 3.])

In [50]:
a = 4
b = a


In [51]:
a = 6
b

4