In [1]:
import json
import numpy as np
from functools import partial
from collections import Counter

In [35]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_original.json") as f:
    full_tr_data = json.load(f)
len(full_tr_data)

94798

In [37]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_original.json") as f:
    full_val_seen_data = json.load(f)
len(full_val_seen_data)

4874

In [38]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_original.json") as f:
    full_val_unseen_data = json.load(f)
len(full_val_unseen_data)   

5005

In [103]:
# Figure 9 number of actions 5-9
def filter_action_lens(low, high, d):
    mean_path_len = np.mean([len(path) for path in d['paths']])
    if low <= mean_path_len and high >= mean_path_len:
        return True
    else:
        return False

filter_5_9 = partial(filter_action_lens, 5, 9)

# -------------------------------------------------------
# Figure 5 start rooms
def filter_start_room(start_rm_list, d):
    if d['start_region_name'] in start_rm_list:
        return True
    else:
        return False
    
filter_start_hallway = partial(filter_start_room, ['hallway'])

# -------------------------------------------------------
# Figure 5 goal rooms
def filter_goal_room(goal_rm_list, d):
    if d['end_region_name'] in goal_rm_list:
        return True
    else:
        return False
    
filter_end_bedroom = partial(filter_goal_room, ['bedroom'])
filter_end_bathroom = partial(filter_goal_room, ['bathroom'])
filter_end_common = partial(filter_goal_room, ['bedroom', 'bathroom'])
# -------------------------------------------------------
# Figure 4 objects
def filter_object(object_list, d):
    if d['object_name'] in object_list:
        return True
    else:
        return False
    
filter_picture = partial(filter_object, ['picture'])
filter_mirror = partial(filter_object, ['mirror'])
filter_pillow = partial(filter_object, ['pillow'])
filter_picture_mirror_pillow = partial(filter_object, ['mirror', 'picture', 'pillow'])
# -------------------------------------------------------
# Language instruction
def filter_instruction(instr_str_list, d):
    for instr_str in instr_str_list:
        if instr_str in d['instructions'][0]:
            return True
    return False

filter_instruction_most_common =  partial(filter_instruction, ['find a sink in the kitchen'])

filter_instruction_most_common_sets =  partial(filter_instruction, ['find a sink in the kitchen', 
                                                                    'find a refrigerator in the kitchen', 
                                                                    'find a picture in the living room' ])

# top 10 most common instructions with traj length between 5-9 steps
filter_instruction_most_common_sets_top_10 =  partial(filter_instruction, ['find a sink in the kitchen', 
                                                                      'find a refrigerator in the kitchen', 
                                                                      'find a picture in the living room',
                                                                      'find a couch in the living room',
                                                                      'find a table in the living room',
                                                                      'find a mirror in one of the bedrooms',
                                                                      'find a nightstand in one of the bedrooms',
                                                                      'find a stove in the kitchen',
                                                                      'find a chair in one of the bedrooms',
                                                                      'find a kitchen counter in the kitchen'
                                                                     ])

# top 30 most common instructions with traj length between 5-9 steps
filter_instruction_most_common_sets_top_30 =  partial(filter_instruction, [ 'find a sink in the kitchen .',
                                                                            'find a refrigerator in the kitchen .',
                                                                            'find a picture in the living room .',
                                                                            'find a couch in the living room .',
                                                                            'find a table in the living room .',
                                                                            'find a mirror in one of the bedrooms .',
                                                                            'find a nightstand in one of the bedrooms .',
                                                                            'find a stove in the kitchen .',
                                                                            'find a chair in one of the bedrooms .',
                                                                            'find a kitchen counter in the kitchen .',
                                                                            'find a plant in the living room .',
                                                                            'find a pillow in the living room .',
                                                                            'find a picture in one of the bedrooms .',
                                                                            'find a coffee table in the living room .',
                                                                            'find a lamp in one of the bedrooms .',
                                                                            'find an oven in the kitchen .',
                                                                            'find a toilet in one of the bathrooms .',
                                                                            'find a picture in the office .',
                                                                            'find a sink in one of the bathrooms .',
                                                                            'find a washing machine in the laundryroom or mudroom .',
                                                                            'find a chair in the office .',
                                                                            'find a bathtub in one of the bathrooms .',
                                                                            'find a table in one of the hallways .',
                                                                            'find a chair in the kitchen .',
                                                                            'find a curtain in the living room .',
                                                                            'find a trash can in one of the bathrooms .',
                                                                            'find a wash basin in one of the bathrooms .',
                                                                            'find a tv in the familyroom .',
                                                                            'find a fireplace in the living room .',
                                                                            'find a table in the dining room .'])
# -------------------------------------------------------

## What are the instructions for short trajectories?

In [85]:
# 5-9
data_short = list(filter(filter_5_9, full_tr_data))

print (len(data_short))

instr_set_short = set()
instr_list_short = []
for d in data_short:
    instr_set_short.add(d['instructions'][0])
    instr_list_short.append(d['instructions'][0])

print (len(instr_set_short))

counter = Counter(instr_list_short).most_common(30)
counter

49503
1435


[('find a sink in the kitchen .', 372),
 ('find a refrigerator in the kitchen .', 369),
 ('find a picture in the living room .', 337),
 ('find a couch in the living room .', 324),
 ('find a table in the living room .', 317),
 ('find a mirror in one of the bedrooms .', 310),
 ('find a nightstand in one of the bedrooms .', 293),
 ('find a stove in the kitchen .', 281),
 ('find a chair in one of the bedrooms .', 279),
 ('find a kitchen counter in the kitchen .', 278),
 ('find a plant in the living room .', 276),
 ('find a pillow in the living room .', 270),
 ('find a picture in one of the bedrooms .', 267),
 ('find a coffee table in the living room .', 266),
 ('find a lamp in one of the bedrooms .', 265),
 ('find an oven in the kitchen .', 263),
 ('find a toilet in one of the bathrooms .', 258),
 ('find a picture in the office .', 251),
 ('find a sink in one of the bathrooms .', 239),
 ('find a washing machine in the laundryroom or mudroom .', 239),
 ('find a chair in the office .', 235),

In [87]:
sum([t[1] for t in counter])

7954

## data 100 - small_three_houses_single_goal_short (training same as val)

In [21]:
def filter_data_100(d):
    if filter_instruction_most_common(d) and filter_5_9(d):
        return True
    else:
        return False 

In [28]:
# 5-9, 'find a sink in the kitchen '
data_100 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_data_100(full_tr_data[idx])]
len(data_100)

372

In [29]:
#  ('ac26ZMwG7aT', 38),
#  ('8WUmhLawc2A', 37),
#  ('cV4RVeZvu5T', 26)
#  3 houses, 5-9, 'find a sink in the kitchen '
data_100 = [d for d in data_100 if d['scan'] in ['ac26ZMwG7aT', '8WUmhLawc2A', 'cV4RVeZvu5T']]
len(data_100)

101

In [30]:
data_100 = [d for d in data_100 if 156450 != d['path_id']]
len(data_100)

100

In [51]:
data_100_scans = [d['scan'] for d in data_100]
len(set(data_100_scans))

3

In [None]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_three_houses_single_goal_short.json", "w") as f:
    json.dump(data_100, f)

## data 1078 - small_three_goals_short -- training

In [31]:
def filter_data_1078(d):
    if filter_instruction_most_common_sets(d) and filter_5_9(d):
        return True
    else:
        return False  

In [59]:
# any scans, length 5-9, 
# 'find a sink in the kitchen', 'find a refrigerator in the kitchen', 'find a picture in the living room'
data_1078 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_data_1078(full_tr_data[idx])]
print ("Data length = ", len(data_1078))

data_1078_scans = [d['scan'] for d in data_1078]
print ("Number of unique houses = ", len(set(data_1078_scans)))

Data length =  1078
Number of unique houses =  31


In [156]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_three_goals_short.json", "w") as f:
    json.dump(data_1078, f)

In [157]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_three_goals_short.json", "w") as f:
    json.dump(data_1078, f)

In [158]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_three_goals_short.json", "w") as f:
    json.dump(data_1078, f)

## data 1078 - small_three_goals_short -- val (less than 1078)

In [115]:
def filter_data_1078_val(d, scans):
    if filter_instruction_most_common_sets(d) and filter_5_9(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [73]:
data_1078_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_1078_val(full_val_seen_data[idx], data_1078_scans)]
print ("Data length = ", len(data_1078_val_seen))

data_1078_val_seen_scans = [d['scan'] for d in data_1078_val_seen]
print ("Number of unique houses = ", len(set(data_1078_val_seen_scans)))

Data length =  42
Number of unique houses =  7


In [76]:
data_1078_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_data_1078(full_val_unseen_data[idx])]
print ("Data length = ", len(data_1078_val_unseen))

data_1078_val_unseen_scans = [d['scan'] for d in data_1078_val_unseen]
print ("Number of unique houses = ", len(set(data_1078_val_unseen_scans)))

Data length =  60
Number of unique houses =  5


## data 1862 - small_three_goals_anylength -- training

In [114]:
# any length
# 'find a sink in the kitchen', 'find a refrigerator in the kitchen', 'find a picture in the living room'
data_1862 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_instruction_most_common_sets(full_tr_data[idx])]
print ("Data length = ", len(data_1862))

data_1862_scans = [d['scan'] for d in data_1862]
print ("Number of unique houses = ", len(set(data_1862_scans)))

Data length =  1862
Number of unique houses =  31


In [132]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_three_goals_anylength.json", "w") as f:
    json.dump(data_1862, f)

## data 1862 - small_three_goals_anylength -- val (less than 1078)

In [119]:
def filter_data_1862_val(d, scans):
    if filter_instruction_most_common_sets(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [120]:
data_1862_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_1862_val(full_val_seen_data[idx], data_1862_scans)]
print ("Data length = ", len(data_1862_val_seen))

data_1862_val_seen_scans = [d['scan'] for d in data_1862_val_seen]
print ("Number of unique houses = ", len(set(data_1862_val_seen_scans)))

Data length =  70
Number of unique houses =  7


In [133]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_three_goals_anylength.json", "w") as f:
    json.dump(data_1862_val_seen, f)

In [121]:
data_1862_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_instruction_most_common_sets(full_val_unseen_data[idx])]
print ("Data length = ", len(data_1862_val_unseen))

data_1862_val_unseen_scans = [d['scan'] for d in data_1862_val_unseen]
print ("Number of unique houses = ", len(set(data_1862_val_unseen_scans)))

Data length =  90
Number of unique houses =  5


In [134]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_three_goals_anylength.json", "w") as f:
    json.dump(data_1862_val_unseen, f)

## data 3160 - small_ten_goals_short -- training

In [64]:
def filter_data_3160(d):
    if filter_instruction_most_common_sets_top_10(d) and filter_5_9(d):
        return True
    else:
        return False  

In [65]:
# any scans, length 5-9, 
# top 10 language instructions
data_3160 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_data_3160(full_tr_data[idx])]
print ("Data length = ", len(data_3160))

data_3160_scans = [d['scan'] for d in data_3160]
print ("Number of unique houses = ", len(set(data_3160_scans)))

Data length =  3160
Number of unique houses =  47


In [135]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_ten_goals_short.json", "w") as f:
    json.dump(data_3160, f)

## data 3160 - small_ten_goals_short -- val (less than 3160)

In [66]:
def filter_data_3160_val(d, scans):
    if filter_instruction_most_common_sets_top_10(d) and filter_5_9(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [78]:
data_3160_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_3160_val(full_val_seen_data[idx], data_3160_scans)]
print ("Data length = ", len(data_3160_val_seen))

data_3160_val_seen_scans = [d['scan'] for d in data_3160_val_seen]
print ("Number of unique houses = ", len(set(data_3160_val_seen_scans)))

Data length =  149
Number of unique houses =  20


In [136]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_ten_goals_short.json", "w") as f:
    json.dump(data_3160_val_seen, f)

In [80]:
data_3160_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_data_3160(full_val_unseen_data[idx])]
print ("Data length = ", len(data_3160_val_unseen))

data_3160_val_unseen_scans = [d['scan'] for d in data_3160_val_unseen]
print ("Number of unique houses = ", len(set(data_3160_val_unseen_scans)))

Data length =  137
Number of unique houses =  8


In [137]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_ten_goals_short.json", "w") as f:
    json.dump(data_3160_val_unseen, f)

## data 5775 - small_ten_goals_anylength -- training

In [123]:
# anylength
# top 10 language instructions
data_5775 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_instruction_most_common_sets_top_10(full_tr_data[idx])]
print ("Data length = ", len(data_5775))

data_5775_scans = [d['scan'] for d in data_5775]
print ("Number of unique houses = ", len(set(data_5775_scans)))

Data length =  5775
Number of unique houses =  48


In [138]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_ten_goals_anylength.json", "w") as f:
    json.dump(data_5775, f)

## data 5775 - small_ten_goals_anylength -- val (less than 5775)

In [124]:
def filter_data_5775_val(d, scans):
    if filter_instruction_most_common_sets_top_10(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [125]:
data_5775_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_5775_val(full_val_seen_data[idx], data_5775_scans)]
print ("Data length = ", len(data_5775_val_seen))

data_5775_val_seen_scans = [d['scan'] for d in data_5775_val_seen]
print ("Number of unique houses = ", len(set(data_5775_val_seen_scans)))

Data length =  258
Number of unique houses =  21


In [139]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_ten_goals_anylength.json", "w") as f:
    json.dump(data_5775_val_seen, f)

In [126]:
data_5775_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_instruction_most_common_sets_top_10(full_val_unseen_data[idx])]
print ("Data length = ", len(data_5775_val_unseen))

data_5775_val_unseen_scans = [d['scan'] for d in data_5775_val_unseen]
print ("Number of unique houses = ", len(set(data_5775_val_unseen_scans)))

Data length =  244
Number of unique houses =  8


In [140]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_ten_goals_anylength.json", "w") as f:
    json.dump(data_5775_val_unseen, f)

## data 7954 - small_thirty_goals_short -- training

In [106]:
def filter_data_7954(d):
    if filter_instruction_most_common_sets_top_30(d) and filter_5_9(d):
        return True
    else:
        return False   

In [107]:
# any scans, length 5-9, 
# 'find a sink in the kitchen', 'find a refrigerator in the kitchen', 'find a picture in the living room'
data_7954 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_data_dummy(full_tr_data[idx])]
print ("Data length = ", len(data_7954))

data_7954_scans = [d['scan'] for d in data_7954]
print ("Number of unique houses = ", len(set(data_7954_scans)))

Data length =  7954
Number of unique houses =  53


In [141]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_thirty_goals_short.json", "w") as f:
    json.dump(data_7954, f)

## data 7954 - small_thirty_goals_short -- val (less than 7954)

In [109]:
def filter_data_7954_val(d, scans):
    if filter_instruction_most_common_sets_top_30(d) and filter_5_9(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [110]:
data_7954_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_7954_val(full_val_seen_data[idx], data_7954_scans)]
print ("Data length = ", len(data_7954_val_seen))

data_7954_val_seen_scans = [d['scan'] for d in data_7954_val_seen]
print ("Number of unique houses = ", len(set(data_7954_val_seen_scans)))

Data length =  380
Number of unique houses =  36


In [142]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_thirty_goals_short.json", "w") as f:
    json.dump(data_7954_val_seen, f)

In [111]:
data_7954_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_data_7954(full_val_unseen_data[idx])]
print ("Data length = ", len(data_7954_val_unseen))

data_7954_val_unseen_scans = [d['scan'] for d in data_7954_val_unseen]
print ("Number of unique houses = ", len(set(data_7954_val_unseen_scans)))

Data length =  365
Number of unique houses =  9


In [143]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_thirty_goals_short.json", "w") as f:
    json.dump(data_7954_val_unseen, f)

## data 14413 - small_thirty_goals_anylength -- training

In [128]:
# anylength
# top 30 language instructions
data_14413 = [full_tr_data[idx] for idx in range(len(full_tr_data)) if filter_instruction_most_common_sets_top_30(full_tr_data[idx])]
print ("Data length = ", len(data_14413))

data_14413_scans = [d['scan'] for d in data_14413]
print ("Number of unique houses = ", len(set(data_14413_scans)))

Data length =  14413
Number of unique houses =  53


In [144]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_train_small_thirty_goals_anylength.json", "w") as f:
    json.dump(data_14413, f)

## data 14413 - small_thirty_goals_anylength -- val (less than 14413)

In [129]:
def filter_data_14413_val(d, scans):
    if filter_instruction_most_common_sets_top_30(d) and (d['scan'] in scans):
        return True
    else:
        return False  

In [130]:
data_14413_val_seen = [full_val_seen_data[idx] for idx in range(len(full_val_seen_data)) if filter_data_14413_val(full_val_seen_data[idx], data_14413_scans)]
print ("Data length = ", len(data_14413_val_seen))

data_14413_val_seen_scans = [d['scan'] for d in data_14413_val_seen]
print ("Number of unique houses = ", len(set(data_14413_val_seen_scans)))

Data length =  688
Number of unique houses =  36


In [145]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_seen_small_thirty_goals_anylength.json", "w") as f:
    json.dump(data_14413_val_seen, f)

In [131]:
data_14413_val_unseen = [full_val_unseen_data[idx] for idx in range(len(full_val_unseen_data)) if filter_instruction_most_common_sets_top_30(full_val_unseen_data[idx])]
print ("Data length = ", len(data_14413_val_unseen))

data_14413_val_unseen_scans = [d['scan'] for d in data_14413_val_unseen]
print ("Number of unique houses = ", len(set(data_14413_val_unseen_scans)))

Data length =  631
Number of unique houses =  9


In [146]:
with open("/home/hoyeung/blob_matterport3d/asknav/asknav_val_unseen_small_thirty_goals_anylength.json", "w") as f:
    json.dump(data_14413_val_unseen, f)

## Philly run yaml template

In [155]:
seeds_10 = [42, 677, 848, 163, 620, 511, 102, 158, 2, 204]
seeds_20 = [677, 848, 163, 620, 511, 102, 158, 2, 204, 471, 154, 220, 748, 623,  47, 882, 136, 315,  48]

# exper_name = "20200126_philly_aggrevate_seeds"
# size = 3160
# data_suffix = "small_ten_goals_short"
# seeds = seeds_10

exper_name = "20200126_philly_aggrevate_seeds"
size = 1078
data_suffix = "small_three_goals_short"
seeds = seeds_10

for num in seeds:
    text = """
- name: data_{size}_seed_{num}
  sku: G1
  command:
  - cd tasks/VNLA/scripts/ && bash train_experiments.sh "{exper_name}" "data_{size}_seed_{num}" "configs/experiment.json" -batch_size 100 -start_beta_decay 100 -decay_beta_every 100 -min_history_to_learn 1000 -save_every 1000 -log_every 1000 -plot_to_philly 0 -data_suffix {data_suffix} -dropout_ratio 0.5 -agent_end_criteria 2.5 -n_iters 50000 -seed {num}
  """.format(size=size, num=num, data_suffix=data_suffix, exper_name=exper_name)
    print(text)


- name: data_1078_seed_42
  sku: G1
  command:
  - cd tasks/VNLA/scripts/ && bash train_experiments.sh "20200126_philly_aggrevate_seeds" "data_1078_seed_42" "configs/experiment.json" -batch_size 100 -start_beta_decay 100 -decay_beta_every 100 -min_history_to_learn 1000 -save_every 1000 -log_every 1000 -plot_to_philly 0 -data_suffix small_three_goals_short -dropout_ratio 0.5 -agent_end_criteria 2.5 -n_iters 50000 -seed 42
  

- name: data_1078_seed_677
  sku: G1
  command:
  - cd tasks/VNLA/scripts/ && bash train_experiments.sh "20200126_philly_aggrevate_seeds" "data_1078_seed_677" "configs/experiment.json" -batch_size 100 -start_beta_decay 100 -decay_beta_every 100 -min_history_to_learn 1000 -save_every 1000 -log_every 1000 -plot_to_philly 0 -data_suffix small_three_goals_short -dropout_ratio 0.5 -agent_end_criteria 2.5 -n_iters 50000 -seed 677
  

- name: data_1078_seed_848
  sku: G1
  command:
  - cd tasks/VNLA/scripts/ && bash train_experiments.sh "20200126_philly_aggrevate_seeds" 