In [7]:
import json
import os
from collections import Counter
import numpy as np
import progressbar

### Read Data and Splits

In [8]:
data_p = '/root/media/legg/data-850-evo/json_data_augmentation_20200820'
splits_p = '/root/home/legg/data_alfred/splits/sample_failed_20200820.json'

In [9]:
with open(splits_p, 'r') as f:
    splits = json.load(f)

In [10]:
len(splits['augmentation'])

7649

In [11]:
# missing an entire subgoal
{'task': 'pick_and_place_simple-AlarmClock-None-Dresser-317/trial_T20200815_024233_857912', 'repeat_idx': 0, 'full_traj_success': True, 'collected_subgoals': 4}

{'task': 'pick_and_place_simple-AlarmClock-None-Dresser-317/trial_T20200815_024233_857912',
 'repeat_idx': 0,
 'full_traj_success': True,
 'collected_subgoals': 4}

### (Pre Explainer/Baseline Prediction) Filter down to only tasks with subgoal lengths match between actions and extracted object states features

In [6]:
fail = 0
fail_full_success = []
fail_full_fail = []
error_free_split = []

for task in progressbar.progressbar(splits['augmentation']):
    
    traj_data_p = os.path.join(data_p, task['task'], 'traj_data.json')
    with open(traj_data_p, 'r') as f:
        traj_data = json.load(f)
    metadata_p = os.path.join(data_p, task['task'], 'metadata_states.json')

    extracted_features_p = os.path.join(data_p, task['task'], 'pp_model:seq2seq_per_subgoal,name:v2_epoch_40_obj_instance_enc_max_pool_dec_aux_loss_weighted_bce_1to2', 'extracted_feature_states.json')
    with open(extracted_features_p, 'r') as f:
        extracted_features = json.load(f)
        
    subgoal_len_features = [(i,len(subgoal)) for i, subgoal in enumerate(extracted_features['instance_visibile'])]
    counter_items = list(Counter([low_a['high_idx'] for low_a in traj_data['plan']['low_actions']]).items())
    counter_items_num = list(Counter(traj_data['num']['low_to_high_idx']).items())
    
    if task['full_traj_success']:
        subgoal_len_features = subgoal_len_features[:-1]
        
    try:
        assert counter_items == subgoal_len_features
        error_free_split.append(task)
    except:
        if task['full_traj_success']:
            fail_full_success.append(task)
        else:
            fail_full_fail.append(task)
#             import pdb; pdb.set_trace()
#         with open(metadata_p, 'r') as f:
#             metadata = json.load(f)
        fail += 1

100% (7649 of 7649) |####################| Elapsed Time: 0:00:20 Time:  0:00:20


In [7]:
len(error_free_split)

7570

In [39]:
error_free_split[0]

{'task': 'look_at_obj_in_light-BaseballBat-None-DeskLamp-301/trial_T20200814_164125_595727',
 'repeat_idx': 0,
 'full_traj_success': False,
 'collected_subgoals': 2}

In [38]:
Counter([t['collected_subgoals'] for t in error_free_split]).most_common()

[(3, 3290),
 (6, 1170),
 (2, 1149),
 (5, 727),
 (4, 313),
 (7, 310),
 (9, 270),
 (8, 142),
 (12, 83),
 (11, 43),
 (10, 33),
 (13, 31),
 (14, 8),
 (16, 1)]

In [40]:
Counter([t['full_traj_success'] for t in error_free_split]).most_common()

[(False, 6963), (True, 607)]

### Save the Filtered Splits

In [35]:
# save splits out!
error_free_splits_p = '/root/data_alfred/splits/sample_failed_20200820_filtered.json'

with open(error_free_splits_p, 'w') as f:
    json.dump({'augmentation':error_free_split}, f)

In [13]:
# save splits out!
error_free_splits_p = '/root/home/legg/data_alfred/splits/sample_failed_20200820_filtered.json'

with open(error_free_splits_p, 'r') as f:
    error_free_split = json.load(f)['augmentation']

In [14]:
len(error_free_split)

7570

### Create Toy Split for Testing

In [49]:
# random_indices = np.random.choice(np.arange(len(error_free_split)), size=40)
random_indices = [5598, 4992, 4753, 6209, 7330, 3377, 1974, 3502, 5065,  772, 5522,
       1035, 4816,  732,   85, 3585, 5463, 5424, 5812, 1934, 7250, 1836,
       5260, 3332, 5095, 4063, 5257, 7358,  581, 2470, 1164, 6440,  979,
       6713, 2440,  714, 4139, 5027, 3355, 6355]

In [46]:
Counter([error_free_split[t]['collected_subgoals'] for t in random_indices]).most_common()

[(3, 14), (6, 9), (2, 8), (5, 3), (11, 2), (10, 1), (9, 1), (12, 1), (4, 1)]

In [53]:
Counter([error_free_split[t]['full_traj_success'] for t in random_indices]).most_common()

[(False, 38), (True, 2)]

In [51]:
toy_split = [error_free_split[t] for t in random_indices]
toy_split[0]

{'task': 'pick_cool_then_place_in_recep-Bread-None-Microwave-18/trial_T20200817_101547_073180',
 'repeat_idx': 0,
 'full_traj_success': False,
 'collected_subgoals': 3}

In [55]:
toy_splits_p = '/root/data_alfred/splits/sample_failed_toy.json'
with open(toy_splits_p, 'w') as f:
    json.dump({'augmentation':toy_split}, f)

In [8]:
toy_splits_p = '/root/data_alfred/splits/sample_failed_toy.json'
with open(toy_splits_p, 'r') as f:
    toy_split = json.load(f)['augmentation']

### (Post Explainer/Baseline Prediction) Truncate predicted instruction subgoals to match true # subgoals

In [24]:
def truncate_extra_subgoals(traj_data, num_subgoals, key='explainer_annotations'):
    assert len(traj_data[key]['anns'][0]['high_descs']) >= num_subgoals
    if len(traj_data[key]['anns'][0]['high_descs']) > num_subgoals:
        traj_data[key]['anns'][0]['high_descs'] = traj_data[key]['anns'][0]['high_descs'][:num_subgoals]
        return True
    else:
        return False

def match_post_prediction_subgoal_lengths(split, data_p, overwrite_traj=False, debug=False):
    '''
    split: a list of tasks {'task':<task name>/<trial id>, 'repeat_idx':int, 'full_traj_success':boolean, 'collected_subgoals':int}
    '''
    adjusts_explainer = 0
    adjusts_baseline = 0
    adjusts_explainer_enconly = 0
    adjusts_explainer_auxonly = 0
    
    for task in split:
        traj_data_p = os.path.join(data_p, task['task'], 'traj_data.json')
        with open(traj_data_p, 'r') as f:
            traj_data = json.load(f)

        if debug:
            print (task)
            print ('Explainer')
            print (traj_data['explainer_annotations'])
            print ('Explainer Enc only')
            print (traj_data['explainer_enconly_annotations'])
            print ('Explainer Aux only')
            print (traj_data['explainer_auxonly_annotations'])
            print ('Baseline')
            print (traj_data['baseline_annotations'])
            print ('----------------------------------------')
            
        true_num_subgoals = len(traj_data['num']['action_high'])-1
        
        # verify that the predicted instructions has # subgoals >= gold # subgoals
        # when predicting in a batch of different tasks, model can decode more than necessary
        assert len(traj_data['explainer_annotations']['anns'][0]['high_descs']) >= true_num_subgoals
        assert len(traj_data['explainer_enconly_annotations']['anns'][0]['high_descs']) >= true_num_subgoals
        assert len(traj_data['explainer_auxonly_annotations']['anns'][0]['high_descs']) >= true_num_subgoals
        assert len(traj_data['baseline_annotations']['anns'][0]['high_descs']) >= true_num_subgoals
        
        adjusts_explainer += int(truncate_extra_subgoals(traj_data, true_num_subgoals, key='explainer_annotations'))
        adjusts_explainer_enconly += int(truncate_extra_subgoals(traj_data, true_num_subgoals, key='explainer_enconly_annotations'))
        adjusts_explainer_auxonly += int(truncate_extra_subgoals(traj_data, true_num_subgoals, key='explainer_auxonly_annotations'))
        adjusts_baseline += int(truncate_extra_subgoals(traj_data, true_num_subgoals, key='baseline_annotations'))

        if debug:
            print (adjusts_explainer, adjusts_baseline)
            print (task)
            print ('Explainer')
            print (traj_data['explainer_annotations'])
            print ('Explainer Aux only')
            print (traj_data['explainer_auxonly_annotations'])
            print ('Explainer Enc only')
            print (traj_data['explainer_enconly_annotations'])
            print ('Baseline')
            print (traj_data['baseline_annotations'])
            print ('\n\n\n\n\n\n\n\n')
        
        # be careful about overwriting!
        if overwrite_traj:
            with open(traj_data_p, 'w') as f:
                json.dump(traj_data, f)
    
    return adjusts_explainer, adjusts_baseline, adjusts_explainer_enconly, adjusts_explainer_auxonly
        

In [None]:
# Verify with toy split and toy dataset
print (len(toy_split))
adjusts_explainer, adjusts_baseline = match_post_prediction_subgoal_lengths(toy_split, data_p, overwrite_traj=True, debug=True)

In [25]:
# Full run on explainer/baseline processed traj data
print (len(error_free_split))
match_post_prediction_subgoal_lengths(error_free_split, data_p, overwrite_traj=True, debug=False)

7570


(0, 0, 0, 0)

### (Post Explainer/Baseline Prediction) Some Sanity Check, Ignore

In [48]:
error_free_split[0]

{'task': 'look_at_obj_in_light-BaseballBat-None-DeskLamp-301/trial_T20200814_164125_595727',
 'repeat_idx': 0,
 'full_traj_success': False,
 'collected_subgoals': 2}

In [115]:
error_free_split[6000]

{'task': 'pick_cool_then_place_in_recep-Pot-None-Cabinet-21/trial_T20200818_085037_265666',
 'repeat_idx': 0,
 'full_traj_success': False,
 'collected_subgoals': 5}

In [65]:
traj_data_p = os.path.join(data_p, 'look_at_obj_in_light-BasketBall-None-DeskLamp-319/trial_T20200814_165040_882757', 'traj_data.json')
with open(traj_data_p, 'r') as f:
    traj_data = json.load(f)

In [116]:
traj_data_p = os.path.join(data_p, 'pick_cool_then_place_in_recep-Pot-None-Cabinet-21/trial_T20200818_085037_265666', 'traj_data.json')
with open(traj_data_p, 'r') as f:
    traj_data = json.load(f)

In [117]:
traj_data['explainer_annotations']

{'anns': [{'task_desc': 'put a cold pan on the table .',
   'high_descs': ['turn to the left and face the white table .',
    'pick up the pot on the stove .',
    'turn around and walk to the fridge .',
    'open the fridge and place the pot on the top shelf , close the door , wait a moment , and take it out again .',
    'turn around and walk to the table .']}]}

In [118]:
traj_data['baseline_annotations']

{'anns': [{'task_desc': 'put a cold pan on the table .',
   'high_descs': ['turn left and walk to the stove .',
    'pick up the pot with the spoon in it .',
    'turn around and go to the white table on the left .',
    'open the fridge , put the pan inside , close the fridge , wait a few seconds , open the fridge , take the pan out , and close the refrigerator .',
    'turn around , go to the white table on the right .']}]}

In [114]:
traj_data['plan']['high_pddl']

[{'discrete_action': {'action': 'GotoLocation', 'args': ['diningtable']},
  'high_idx': 0,
  'planner_action': {'action': 'GotoLocation', 'location': 'loc|-8|-3|3|60'}},
 {'discrete_action': {'action': 'PickupObject', 'args': ['lettuce']},
  'high_idx': 1,
  'planner_action': {'action': 'PickupObject',
   'forceVisible': True,
   'objectId': 'Lettuce|-02.58|+00.80|-00.61',
   'receptacleObjectId': 'DiningTable|-02.87|+00.68|-00.51'}},
 {'discrete_action': {'action': 'GotoLocation', 'args': ['sink']},
  'high_idx': 2,
  'planner_action': {'action': 'GotoLocation', 'location': 'loc|-8|-13|2|60'}},
 {'discrete_action': {'action': 'CleanObject', 'args': ['lettuce']},
  'high_idx': 3,
  'planner_action': {'action': 'CleanObject',
   'cleanObjectId': 'Sink|-01.79|+00.90|-03.75|SinkBasin',
   'forceVisible': True,
   'objectId': 'Sink|-01.79|+00.90|-03.75|SinkBasin',
   'receptacleObjectId': 'Sink|-01.79|+00.90|-03.75|SinkBasin'}},
 {'discrete_action': {'action': 'NoOp', 'args': []},
  'high_