In [1]:
import json
import numpy as np
import pickle
import utils
import sys
import os
import math
from collections import defaultdict
from collections import Counter

In [2]:
try:
    sys.path.append('/opt/MatterSim/build/')  # local docker or Philly
    import MatterSim
except: 
    # local conda env only
    sys.path.append('/home/hoyeung/Documents/vnla/code/build')  
    import MatterSim

In [3]:
PT_DATA_DIR = "/home/hoyeung/blob_matterport3d/"
os.environ['PT_DATA_DIR'] = "/home/hoyeung/blob_matterport3d/"

EXP_DIR = "/home/hoyeung/blob_experiments/"

In [4]:
with open(PT_DATA_DIR + "asknav/asknav_val_seen.json", "r") as f:
    seen_data = json.load(f)
    
len(seen_data)

4874

In [5]:
with open(PT_DATA_DIR + "asknav/asknav_train.json", "r") as f:
    train_data = json.load(f)
    
len(train_data)

94798

In [6]:
res_dir = "output_philly/20200207_philly_rerun_vnla/dagger_no_ask/20200207_philly_rerun_vnla_nav_sample_ask_teacher_val_seen_for_eval.json"
with open(EXP_DIR + res_dir, "r") as f:
    seen_results = json.load(f)
    
len(seen_results)

4874

In [7]:
"/home/hoyeung/blob_experiments/output_philly/20200207_philly_rerun_vnla/dagger_no_ask/"

'/home/hoyeung/blob_experiments/output_philly/20200207_philly_rerun_vnla/dagger_no_ask/'

In [8]:
with open('/home/hoyeung/blob_matterport3d/semantics/asknav_tr_scans.txt', "r") as f:
    asknav_tr_scans = f.read().split('\n')[:-1]
len(asknav_tr_scans)

56

In [9]:
all_panos_to_region = {}
for scan in asknav_tr_scans:
    all_panos_to_region[scan] = utils.load_panos_to_region(scan,"")

In [10]:
viewix_heading_elevation_map = {}
for i in range(0,12):
    viewix_heading_elevation_map[i] = (i*math.pi/6, -math.pi/6)
for i in range(12,24):
    viewix_heading_elevation_map[i] = (i*math.pi/6, 0)
for i in range(24,36):
    viewix_heading_elevation_map[i] = (i*math.pi/6, +math.pi/6)

In [11]:
PT_DATA_DIR = '/home/hoyeung/blob_matterport3d/'
sim = MatterSim.Simulator()
sim.setRenderingEnabled(False)
sim.setDiscretizedViewingAngles(True)
sim.setCameraResolution(640, 480)
sim.setCameraVFOV(math.radians(60))
sim.setNavGraphPath(
    os.path.join(PT_DATA_DIR, 'connectivity'))
sim.init()

### Analysis

In [12]:
increment_30 = 2 * math.pi / 12.0
3.141592653589793/ increment_30

6.0

In [13]:
# Get the most common sequences
# given each starting point image, find the most common gold sequence
# map [(scan, viewpoint, viewix)] : [traj1, traj2, traj3]...

In [14]:
full_trajectories_seen = defaultdict(list)

for dat in seen_data:
    scan = dat['scan']
    start_viewpoint = dat['paths'][0][0]
    viewix = dat['heading'] // 12
    for path in dat['paths']:
        full_trajectories_seen[(scan, start_viewpoint, viewix)].append(path)

len(full_trajectories_seen)

3012

In [15]:
full_trajectories_train = defaultdict(list)

for dat in train_data:
    scan = dat['scan']
    start_viewpoint = dat['paths'][0][0]
    viewix = dat['heading'] // 12
    for path in dat['paths']:
        full_trajectories_train[(scan, start_viewpoint, viewix)].append(path)
        
len(full_trajectories_train)

6774

In [16]:
full_trajectories_seen_res = defaultdict(list)

for res in seen_results:
    scan = res['scan']
    start_viewpoint = res['trajectory'][0][0]
    viewix = res['trajectory'][0][1] // 12
    
    dup_path = [step[0] for step in res['trajectory']]
    step = dup_path[0]
    path = [step]
    for next_step in dup_path:
        if next_step != step:
            step = next_step
            path.append(step)
    
    full_trajectories_seen_res[(scan, start_viewpoint, viewix)].append(path)
    
len(full_trajectories_seen_res)

3012

In [17]:
seen_results_keys = []
for key in full_trajectories_seen_res:
    seen_results_keys.append(key)

### trimmed at step 5

In [19]:
def trim_trajectories(traj_set, length):
    trimmed_traj_set = {}
    for key in traj_set:
        new_paths = [tuple(p[:length]) for p in traj_set[key]]
        trimmed_traj_set[key] = Counter(new_paths).most_common()
    return  trimmed_traj_set

In [27]:
full_trajectories_seen_trimmed_1 = trim_trajectories(full_trajectories_seen, length=1)
full_trajectories_train_trimmed_1 = trim_trajectories(full_trajectories_train, length=1)
full_trajectories_seen_res_trimmed_1 = trim_trajectories(full_trajectories_seen_res, length=1)

In [20]:
full_trajectories_seen_trimmed_5 = trim_trajectories(full_trajectories_seen, length=5)
full_trajectories_train_trimmed_5 = trim_trajectories(full_trajectories_train, length=5)
full_trajectories_seen_res_trimmed_5 = trim_trajectories(full_trajectories_seen_res, length=5)

In [21]:
full_trajectories_seen_trimmed_8 = trim_trajectories(full_trajectories_seen, length=8)
full_trajectories_train_trimmed_8 = trim_trajectories(full_trajectories_train, length=8)
full_trajectories_seen_res_trimmed_8 = trim_trajectories(full_trajectories_seen_res, length=8)

In [22]:
full_trajectories_seen_trimmed_10 = trim_trajectories(full_trajectories_seen, length=10)
full_trajectories_train_trimmed_10 = trim_trajectories(full_trajectories_train, length=10)
full_trajectories_seen_res_trimmed_10 = trim_trajectories(full_trajectories_seen_res, length=10)

### find divergence points

In [None]:
# per task, at which time step does the agent tend to diverge from the gold traj?

In [77]:
aligned_data_results = defaultdict(lambda: defaultdict(list))

for dat in seen_data:
    idx = dat['path_id']
    scan = dat['scan']
    start_viewpt = dat['paths'][0][0]
    start_viewix = dat['heading'] // 12
    res = [res for res in seen_results if str(dat['path_id']) in res['instr_id']][0]
    
    aligned_data_results[idx]['start'] = [scan, start_viewpt, start_viewix]
    aligned_data_results[idx]['tar'] = dat['paths']
    aligned_data_results[idx]['pred_fine'] = [step[0] for step in res['trajectory']]
    
    curr_v = start_viewpt
    p = [curr_v]
    for v in aligned_data_results[idx]['pred_fine']:
        if v != curr_v:
            curr_v = v
            p.append(curr_v)
    
    aligned_data_results[idx]['pred'] = p
    
    # divergence pt
    

In [82]:
aligned_data_results[175670]

defaultdict(list,
            {'pred': ['2594cedd9da64c338c1ba98abed9efb3',
              '1f8ee74dbb254dff85771776279eae94',
              '192fb2a16486430a96b34dffc2e8cbbc',
              '2e52341d21b940b5b38a9e26c4cf4200',
              'd72137c926c94532a63bd98193273e22',
              'dc189857232643f9a17d5760ebb07b02',
              '5117df51d5b64847980813ed0519f031',
              '5d5aa3fb8039496b9930c0ca09d277c2'],
             'pred_fine': ['2594cedd9da64c338c1ba98abed9efb3',
              '1f8ee74dbb254dff85771776279eae94',
              '192fb2a16486430a96b34dffc2e8cbbc',
              '2e52341d21b940b5b38a9e26c4cf4200',
              'd72137c926c94532a63bd98193273e22',
              'dc189857232643f9a17d5760ebb07b02',
              'dc189857232643f9a17d5760ebb07b02',
              'dc189857232643f9a17d5760ebb07b02',
              'dc189857232643f9a17d5760ebb07b02',
              '5117df51d5b64847980813ed0519f031',
              '5d5aa3fb8039496b9930c0ca09d277c2',
          

In [78]:
aligned_data_results

defaultdict(<function __main__.<lambda>()>,
            {163843: defaultdict(list,
                         {'pred': ['b04418ba1f3c437f9006e16dfab20c2f',
                           'c305c2e9304047b2a87bd1e56ce01b55',
                           '2f9f9138696447ce8cf3d64be4339a4f',
                           '357424fc423a4e978fa88a8e4d3bed78',
                           '3187b9296abe47c68fc0e8bab7edfb36',
                           '6268c4082bdf4f029898a13ca335a8a1'],
                          'pred_fine': ['b04418ba1f3c437f9006e16dfab20c2f',
                           'b04418ba1f3c437f9006e16dfab20c2f',
                           'b04418ba1f3c437f9006e16dfab20c2f',
                           'b04418ba1f3c437f9006e16dfab20c2f',
                           'c305c2e9304047b2a87bd1e56ce01b55',
                           '2f9f9138696447ce8cf3d64be4339a4f',
                           '2f9f9138696447ce8cf3d64be4339a4f',
                           '2f9f9138696447ce8cf3d64be4339a4f',
             

In [80]:
import nltk

In [86]:
references

[['b04418ba1f3c437f9006e16dfab20c2f',
  'c305c2e9304047b2a87bd1e56ce01b55',
  '357424fc423a4e978fa88a8e4d3bed78',
  '3187b9296abe47c68fc0e8bab7edfb36',
  '6268c4082bdf4f029898a13ca335a8a1'],
 ['b04418ba1f3c437f9006e16dfab20c2f',
  'c305c2e9304047b2a87bd1e56ce01b55',
  '357424fc423a4e978fa88a8e4d3bed78',
  '3187b9296abe47c68fc0e8bab7edfb36']]

In [87]:
hypothesis

['b04418ba1f3c437f9006e16dfab20c2f',
 'c305c2e9304047b2a87bd1e56ce01b55',
 '2f9f9138696447ce8cf3d64be4339a4f',
 '357424fc423a4e978fa88a8e4d3bed78',
 '3187b9296abe47c68fc0e8bab7edfb36',
 '6268c4082bdf4f029898a13ca335a8a1']

In [88]:
references = aligned_data_results[163843]['tar']
hypothesis = aligned_data_results[163843]['pred']

BLEUscore = nltk.translate.bleu_score.sentence_bleu(references, hypothesis, weights = (0.5, 0.5))
print (BLEUscore)

0.7071067811865476


In [95]:
references = aligned_data_results[163843]['tar']
hypothesis = aligned_data_results[163843]['pred']

BLEUscore = nltk.translate.bleu_score.sentence_bleu(references, hypothesis, weights = (0.25, 0.25, 0.50))
print (BLEUscore)

0.42044820762685725


In [None]:
# compute some n-gram overlap

In [None]:
6

### how much does an agent look around?