In [81]:
import numpy as np
import json
import os
from datetime import datetime
from collections import defaultdict
from math import ceil
import random

In [7]:
dout = '/root/data/home/hoyeung/blob_alfred_data/exp_all/model:seq2seq_per_subgoal,name:v2_epoch_40_obj_instance_enc_max_pool_dec_aux_loss_weighted_bce_1to2_sample_sentences/'
data = '/root/data_alfred/json_feat_2.1.0/'
# instr_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M")
instr_TIMESTAMP = '20200810_1829'

In [30]:
instr_TIMESTAMP

'20200810_1829'

## DATA

In [9]:
def perplexity(probs):
    return 2**(-1*np.sum(np.log2(probs))/len(probs))

In [133]:
def augment_one_task(pred, key, num_best_candidates=3, num_sample_instructions=6, choices=[0, 1, 2, 0, 1, 2]):

    num_subgoals = len(pred[key]['action_high'])-1
    top_candidates = {}
    
    # Filter to top 3, unique temperature sampled instructions by perplexity
    for subgoal_i in range(num_subgoals):
        candidates = [can[str(subgoal_i)] for can in pred[key]['p_lang_instr']]
        candidates_scores = [perplexity(score[str(subgoal_i)]) for score in pred[key]['p_lang_probs']]
        instr_score_pairs = [(round(perplex,3), instr) for instr, perplex in zip(candidates, candidates_scores) if instr != '']
        sorted_candidates = sorted(instr_score_pairs, key=lambda x:x[0], reverse=False)
        
        top_candidates[subgoal_i] = []
        last = ''
        ct = 0
        repeat = 0
        for score, can in sorted_candidates:
            if can != last and can != last+' .' and can != last+'.':
                last = can
                top_candidates[subgoal_i].append(can[0].upper() + can[1:].replace(' .', '.'))
                ct += 1
            if ct == num_best_candidates:
                break
        while ct < num_best_candidates:
            # if temperature doesn't generate 3 unique sentences
            top_candidates[subgoal_i].append(top_candidates[subgoal_i][-1])
            repeat += 1
            ct += 1
    
    # Sample from the filtered
    choices_per_subgoal = {subgoal_i:random.sample(choices, num_sample_instructions) for subgoal_i in range(num_subgoals)}
    
    new_instructions = []
    for sampled_instr_i in range(num_sample_instructions):
        high_descs = [] 
        for subgoal_i in range(num_subgoals):
            try:
                high_descs.append((top_candidates[subgoal_i][choices_per_subgoal[subgoal_i].pop()]))
            except:
                import pdb; pdb.set_trace()
        out = {'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S_%f"), 'high_descs':high_descs}
        new_instructions.append(out)
    # ---------------------

    return new_instructions, repeat


def augment_split(pred, num_best_candidates, num_sample_instructions, data, instr_timestamp):
    '''
    pred: loaded from {}_sampled.temperature_{}.preds.json.
    num_best_candidates: int. number of top generated instructions per subgoal to keep.
    num_sample_instructions: int. sample 3-6 new instructions
    instr_timestamp: str. timestamp for the whole instruction set
    '''
    out = {}
    outpaths = {}
    repeats = defaultdict(int)
    pred_keys = list(pred.keys())
    ct = 0
    total = len(pred_keys)
    
    # [0, 1, 2, 0, 1, 2] for num_best_candidates= 3, num_sample_instructions=6
    # [0, 1, 2, 3, 0, 1] for num_best_candidates=4, num_sample_instructions=6
    choices = (list(range(num_best_candidates)) * ceil(num_sample_instructions / num_best_candidates))[:num_sample_instructions]
    
    for k in pred_keys:
        new_instructions, sampling_repeat = augment_one_task(pred, k, num_best_candidates, num_sample_instructions, choices)
        out[k] = new_instructions
        
        #pred[k]['root'] is '/root/data_alfred/json_feat_2.1.0/pick_cool_then_place_in_recep-LettuceSliced-None-DiningTable-17/trial_T20190909_070538_437648'
        save_dir = pred[k]['root'].replace(data, os.path.join(data,split+'/'))
        save_p = os.path.join(save_dir, 'explained_instructions_temperature_0.75_T{}.json'.format(instr_timestamp))
        with open(save_p, 'w') as f:
            json.dump(new_instructions,f)
        outpaths[k] = save_p
        repeats[k] += sampling_repeat
        ct += 1
        
        if ct%1000 == 0:
            print(f'completed {ct}/{total}')
        
    return out, outpaths, repeats

## Run Augment

In [11]:
split = 'train'
temperature = '0.75'
pred_f = os.path.join(dout, '{}_sampled.temperature_{}.preds.json'.format(split, temperature))
with open(pred_f, 'r') as f:
    pred = json.load(f)

In [143]:
out, outpaths, repeats = augment_split(pred, num_best_candidates=4, num_sample_instructions=6, data=data, instr_timestamp=instr_TIMESTAMP)

  


completed 1000/20806
completed 2000/20806
completed 3000/20806
completed 4000/20806
completed 5000/20806
completed 6000/20806
completed 7000/20806
completed 8000/20806
completed 9000/20806
completed 10000/20806
completed 11000/20806
completed 12000/20806
completed 13000/20806
completed 14000/20806
completed 15000/20806
completed 16000/20806
completed 17000/20806
completed 18000/20806
completed 19000/20806
completed 20000/20806


In [132]:
out['trial_T20190908_205050_000947_1']

[{'timestamp': '20200810_195806_041141',
  'high_descs': ['Turn around and go to the sink.',
   'Pick up the egg from the sink.',
   'Turn right and walk to the microwave.',
   'Put the egg in the microwave and shut the door and then open the door and take the egg out again and shut the door.',
   'Turn to the left to face the counter to the left of the sink.',
   'Put the egg in the sink']},
 {'timestamp': '20200810_195806_041159',
  'high_descs': ['Move to the sink to the right of you',
   "Pick up the egg that ' s in front of you to the right of the sink.",
   'Turn right and walk to the microwave.',
   'Heat the egg in the microwave.',
   'Turn to the left to face the sink.',
   'Put the egg in the sink.']},
 {'timestamp': '20200810_195806_041167',
  'high_descs': ['Turn around and walk to the sink on your right.',
   'Pick up the egg on the counter.',
   'Turn right to face the microwave.',
   'Put the egg in the microwave and shut the door and then open the door and take the egg 

In [139]:
sum(repeats.values())

669

## ToDo Preprocess Explained Instructions

In [149]:
split_p = os.path.join(data, '..', 'splits', 'may17.json')

In [150]:
with open(split_p, 'r') as f:
    splits = json.load(f)

In [152]:
splits['train']

[{'repeat_idx': 0,
  'task': 'pick_cool_then_place_in_recep-LettuceSliced-None-DiningTable-17/trial_T20190909_070538_437648'},
 {'repeat_idx': 1,
  'task': 'pick_cool_then_place_in_recep-LettuceSliced-None-DiningTable-17/trial_T20190909_070538_437648'},
 {'repeat_idx': 2,
  'task': 'pick_cool_then_place_in_recep-LettuceSliced-None-DiningTable-17/trial_T20190909_070538_437648'},
 {'repeat_idx': 0,
  'task': 'pick_two_obj_and_place-Candle-None-Cabinet-414/trial_T20190908_190650_163902'},
 {'repeat_idx': 1,
  'task': 'pick_two_obj_and_place-Candle-None-Cabinet-414/trial_T20190908_190650_163902'},
 {'repeat_idx': 2,
  'task': 'pick_two_obj_and_place-Candle-None-Cabinet-414/trial_T20190908_190650_163902'},
 {'repeat_idx': 0,
  'task': 'pick_heat_then_place_in_recep-Egg-None-SinkBasin-20/trial_T20190908_205050_000947'},
 {'repeat_idx': 1,
  'task': 'pick_heat_then_place_in_recep-Egg-None-SinkBasin-20/trial_T20190908_205050_000947'},
 {'repeat_idx': 2,
  'task': 'pick_heat_then_place_in_recep

In [153]:
splits['train'][0]['task']

'pick_cool_then_place_in_recep-LettuceSliced-None-DiningTable-17/trial_T20190909_070538_437648'

In [155]:
instr_timestamp = '20200810_1829'
k = 'train'

for task in splits[k]:
    if task['repeat_idx'] == 0:
        # change to self.args TODO
        json_path = os.path.join(data, k, task['task'], 'explained_instructions_temperature_0.75_T{}.json'.format(instr_timestamp))
        with open(json_path, 'r') as f:
            explained_instructions = json.load(f)
            
        

In [156]:
json_path

'/root/data_alfred/json_feat_2.1.0/train/pick_clean_then_place_in_recep-Cloth-None-Drawer-411/trial_T20190907_202223_569116/explained_instructions_temperature_0.75_T20200810_1829.json'

## Reference

In [105]:
# Check how turk_annotation is structured
traj['turk_annotations']

{'anns': [{'assignment_id': 'A2BLQ1GVEHJR8T_32Q90QCQ1V2QNJEA05FKN0E2M8WKEU',
   'high_descs': ['Turn left and go around the end of the white table on your left and turn to face back toward the stove.',
    'Pick up the tomato from the corner of the table.',
    'Carry the tomato around the table to the microwave above the stove.',
    'Heat the tomato in the microwave.',
    'Go to the refrigerator that is behind you to the left.',
    'Put the tomato on the center of the shelf that is second from the top in the refrigerator.'],
   'task_desc': 'Put a cooked tomato into the refrigerator.',
   'votes': [1, 1]},
  {'assignment_id': 'A1ELPYAFO7MANS_3BDCF01OG0BZDUDJL63F93JXTKPLYW',
   'high_descs': ['Turn left and walk to the far end of the table.',
    'Pick up the tomato on the table.',
    'Turn left and walk to the microwave.',
    'Heat the tomato in the microwave.',
    'Turn around and veer right to the fridge.',
    'Place the tomato in the fridge on the fourth shelf to the bottom.

In [172]:
new_instructions

[{'timestamp': '20200809_212959_377798',
  'high_descs': ['Turn left and walk to the white table.',
   'Pick up the knife from the table',
   'Cut the lettuce on the table into slices.',
   'Turn left and walk to the fridge',
   'Put the knife in the fridge',
   'Turn right , go forward , turn left at the white table.',
   'Pick up the head of lettuce on the table.',
   'Turn around and go to the fridge.',
   'Place the lettuce in the fridge and close the door and then open the door and remove the lettuce again.',
   'Turn around and walk to the end of the table , then turn right and face the white table.',
   'Put the lettuce on the table in front of the loaf of bread.']},
 {'timestamp': '20200809_212959_377947',
  'high_descs': ['Turn left and take a step steps , then turn left and walk up to the white table.',
   'Pick up the knife on the table',
   'Slice the lettuce on the table.',
   'Turn to your left and go to the fridge',
   'Put the knife in the fridge',
   'Turn right and wa