# Training Sample generation

We are going to generate trajecotry {action, observation} and instruction raw pairs 

expected output 

- action_dict = {key=data_no., value=[action]}
- observation_dict = {key=data_no., value = [local_img_path]} e.g., 1458/4002.jpeg
- instruction_dic = {key=data_no., value = raw_text}


## Prerequisite

We need to have the following data

- goyal annotation dataset with associated images

```
└── atari-lang
    ├── 1199
    ├── 1256
    ├── 1289
    ├── 1340
    ├── 1381
    ├── 1383
    ├── 1431
    ├── 1458
    ├── 1473
    ├── 249
    ├── 275
    ├── 281
    ├── 283
    ├── 300
    ├── 324
    ├── 546
    ├── 559
    ├── 563
    ├── 658
    ├── 958
    └── annotations.txt

```

In [1]:
# import useful package 
from tqdm import tqdm  # show progress bar
import time # time 
import glob # parse folder 
import io 
import pickle
import copy
from pathlib import Path
from random import shuffle
from random import sample
import random
import cv2
import numpy as np
import pandas as pd
import os

In [2]:
atari_lang_folder = "/home/sukai/Downloads/MontezumaRevenge_Goyal/atari-lang"
atari_aciton_folder = "/home/sukai/Downloads/MontezumaRevenge_human/atari_v1/trajectories/revenge"
is_jpeg_generated = True
resize = 84
clip_length = 150
annotation_file_size = 6870

def fromfile(file, dtype, count, *args, **kwargs):
    """Read from any file-like object into a numpy array
       im = io.BytesIO(f.read())
       m = fromfile(im, dtype=np.uint8, count=im.__sizeof__())"""

    itemsize = np.dtype(dtype).itemsize
    buffer = np.zeros(count * itemsize, np.uint8)
    bytes_read = -1
    offset = 0
    while bytes_read != 0:
        bytes_read = file.readinto(buffer[offset:])
        offset += bytes_read
    rounded_bytes = (offset // itemsize) * itemsize
    buffer = buffer[:rounded_bytes]
    buffer.dtype = dtype
    return buffer

## Preprocess 
we would like to convert png files to jpeg file for Nvidia DALI speed-up  
also resize the image

In [3]:
if not is_jpeg_generated:
    png_files = glob.glob(atari_lang_folder +"/**/*.png", recursive=True)
    for path in tqdm(png_files):
        with open(path, 'rb') as f:
            im = io.BytesIO(f.read())
            im = fromfile(im, dtype=np.uint8, count=im.__sizeof__())
            im = cv2.imdecode(im, cv2.IMREAD_COLOR)
            # rescale
            im = cv2.resize(im, (resize, resize), interpolation = cv2.INTER_AREA)
            store_pathname = path.split('.')[0] + '.jpeg'
            if not cv2.imwrite(store_pathname, im):
                raise Exception("Could not write image")

100%|█████████████████████████████████| 182896/182896 [02:20<00:00, 1301.57it/s]


## parsing the annotation.txt file

In [48]:
annotation_file = os.path.join(atari_lang_folder, "annotations.txt")
instruction_dict = dict()
action_dict = dict()
observation_dict = dict()

count = 0

with open(annotation_file, 'rt') as f:
    for line in tqdm(f, total=annotation_file_size):
        clip_id, text = line.split('.mp4')
        text = text.strip().lower()
        instruction_dict[count] = text
        observation_clips = [] 
        clip_folder, frame_range = clip_id.split('/')
        frame_start = int(frame_range.split('-')[0])
        for frame_id in range(frame_start, frame_start + clip_length):
            observation_clips.append(os.path.join(clip_folder, str(frame_id)+".jpeg"))
        
        observation_dict[count] = observation_clips
#         print(clip_id)
#         print(text)
#         print(observation_clips)
        
        # get action dataset 
        traj_info_path = os.path.join(atari_aciton_folder, clip_folder+'.txt')
        traj_df = pd.read_csv(traj_info_path, skiprows=1, )
        actions = traj_df[' action'].iloc[frame_start:frame_start + clip_length].tolist()
        action_dict[count] = actions
        count += 1
    

100%|██████████████████████████████████████| 6870/6870 [00:15<00:00, 430.48it/s]


## split into training and testing data

In [71]:
random.seed(12)

testing_data_size = int(len(observation_dict) * 0.177)
testing_data_ids = sample(observation_dict.keys(), testing_data_size)

testing_positive_instruction_dict = dict()
testing_positive_observation_dict = dict()
testing_positive_action_dict = dict()

for test_id in tqdm(testing_data_ids):
    instruction_ele = instruction_dict.pop(test_id)
    observation_ele = observation_dict.pop(test_id)
    action_ele = action_dict.pop(test_id)
    # store the data
    testing_positive_instruction_dict[test_id] = instruction_ele
    testing_positive_action_dict[test_id] = action_ele
    testing_positive_observation_dict[test_id] = observation_ele

100%|███████████████████████████████████| 1215/1215 [00:00<00:00, 971421.91it/s]


In [78]:
# now generate negative testing samples from positive dicts
testing_negative_data_size = testing_data_size // 2
testing_negative_data_ids = sample(testing_positive_instruction_dict.keys(), testing_negative_data_size)

testing_negative_instruction_list = []
testing_negative_observation_dict = dict()
testing_negative_action_dict = dict()

for test_id in tqdm(testing_negative_data_ids):
    instruction_ele = testing_positive_instruction_dict.pop(test_id)
    observation_ele = testing_positive_observation_dict.pop(test_id)
    action_ele = testing_positive_action_dict.pop(test_id)
    # store the data
    testing_negative_instruction_list.append(instruction_ele)
    testing_negative_action_dict[test_id] = action_ele
    testing_negative_observation_dict[test_id] = observation_ele
# shuffle 
shuffle(testing_negative_instruction_list)
testing_negative_instruction_dict = dict()
for i, key in tqdm(enumerate(testing_negative_action_dict.keys())):
    testing_negative_instruction_dict[key] = testing_negative_instruction_list[i]

100%|█████████████████████████████████████| 607/607 [00:00<00:00, 917456.77it/s]
607it [00:00, 2331449.20it/s]


## Store the dictionary 

In [80]:
# check size 
print('negative testing sample size', len(testing_negative_instruction_dict))
print('positive testing sample size', len(testing_positive_instruction_dict))
print('training sample size', len(instruction_dict))

negative testing sample size 607
positive testing sample size 608
training sample size 5655


In [81]:
list(testing_negative_instruction_dict.items())[0]

(4103, 'run left and climb up the ladder.')

In [82]:
list(testing_positive_instruction_dict.items())[0]

(3887, 'move slightly towards right')

In [83]:
list(instruction_dict.items())[0]

(0, 'climb up the ladder until you reach the top of the purple room')

In [84]:
# store training dataset 
with open('training_instruction_dict_raw.pkl', 'wb') as f:
    pickle.dump(instruction_dict, f)

with open('training_action_dict_raw.pkl', 'wb') as f:
    pickle.dump(action_dict, f)
    
with open('training_observation_dict_raw.pkl', 'wb') as f:
    pickle.dump(observation_dict, f)

In [85]:
# store testing positive dataset 
with open('test_data/testing_positive_instruction_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_positive_instruction_dict, f)

with open('test_data/testing_positive_action_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_positive_action_dict, f)
    
with open('test_data/testing_positive_observation_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_positive_observation_dict, f)

In [86]:
# store testing negative dataset 
with open('test_data/testing_negative_instruction_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_negative_instruction_dict, f)

with open('test_data/testing_negative_action_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_negative_action_dict, f)
    
with open('test_data/testing_negative_observation_dict_raw.pkl', 'wb') as f:
    pickle.dump(testing_negative_observation_dict, f)