# Data Preprocessing Notebook

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
import imageio
import PySimpleGUI as sg
from datetime import datetime

saveFileIndex = 0 # check annotation folder to see the current index

In [None]:
FILE_ID = '1'
action_meaning = {  0: 'NOOP',
1: 'FIRE',
 2:'UP' ,
  3:'RIGHT',
 4:'LEFT' ,
 5:'DOWN' ,
6: 'UPRIGHT',
 7:'UPLEFT' ,
  8: 'DOWNRIGHT',
 9:'DOWNLEFT' ,
10:'UPFIRE'  ,
11:'RIGHTFIRE'  ,
  12:'LEFTFIRE',
13:'DOWNFIRE' ,
14:'UPRIGHTFIRE'  ,
 15:'UPLEFTFIRE',
16:'DOWNRIGHTFIRE' ,
17:'DOWNLEFTFIRE' }

In [None]:
file_path = os.path.join(os.getcwd(), "atari_v1/trajectories/revenge", f'{FILE_ID}.txt')
observation_path = os.path.join(os.getcwd(), "atari_v1/screens/revenge", f'{FILE_ID}')

df = pd.read_csv(file_path, skiprows=1)
df.columns = df.columns.str.strip()
df.terminal = df.terminal.str.strip() == 'True'

In [None]:
df.columns

In [None]:
df.head()

## Annotating trajectories

In [None]:
# refer to the walkthrough web pages 
# https://gamefaqs.gamespot.com/sms/588107-montezumas-revenge/faqs/39122
# https://strategywiki.org/wiki/Montezuma%27s_Revenge/Walkthrough

# our plan is: we first of all split the trajectories by the rooms the agent is located 
# then after that we further segment the trajectory clips into smaller ones that matches single sentences in the walkthrough.

In [None]:
# define utils function 
def display_annotation(ind):
    print(f"""
    trajectory_id: {trajectory_id_dict[ind]};
    start_frame: {start_frame_dict[ind]};
    end_frame: {end_frame_dict[ind]};
    category: {category_dict[ind]};
    description: {description_dict[ind]};
    replay_type: {replay_type_dict[ind]};
    room_number: {room_number_dict[ind]};
    dataset_name: {dataset_name_dict[ind]}
    """)
    
def convert_to_pandas():
    # convert to pandas first 
    trajectory_id_series = pd.Series(trajectory_id_dict)
    start_frame_series = pd.Series(start_frame_dict)
    end_frame_series = pd.Series(end_frame_dict)
    category_series = pd.Series(category_dict)
    description_series = pd.Series(description_dict)
    room_number_series = pd.Series(room_number_dict)
    dataset_name_series = pd.Series(dataset_name_dict)
    replay_type_series = pd.Series(replay_type_dict)

    df = pd.concat({
        "trajectory_id": trajectory_id_series,
        "start_frame": start_frame_series,
        "end_frame": end_frame_series,
        "category": category_series,
        "description": description_series,
        "replay_type": replay_type_series,
        "room_number": room_number_series,
        "dataset_name": dataset_name_series
    }, axis = 1)
    
    return df 
    
def save_to_csv():
    df = convert_to_pandas()
    now = datetime.now()
    current_time = now.strftime("%H_%M_%S-%d-%m-%Y")
    df.to_csv(f"annotations/{saveFileIndex}_timestamp_{current_time}.csv", index=False)
    

In [None]:
# table design 
# category : {room, sentence}
# | trajectory id | start frame | end frame | category | description | replay type | room number | dataset name |

trajectory_id_dict = dict() # <num> 
start_frame_dict = dict() # <num>
end_frame_dict = dict() # <num>
category_dict = dict() # {room, sentence} room means we segment the trajectory based on the room the agent stays, 
description_dict = dict() # <text>
replay_type_dict = dict() # good trajectory or bad 
room_number_dict = dict() # A1 / B1 etc.  
dataset_name_dict = dict () # "human" / "dqn"


In [None]:
# get the trajectory ids 
traj_dir = os.path.join(os.getcwd(), "atari_v1/screens/revenge/")
traj_id_arr = [int(f) for f in os.listdir(traj_dir)] 
traj_id_arr = sorted(traj_id_arr)
# later run traj_id_arr[entry_ind] to get trajectory id 

In [None]:
# init variables 
# ----------Trajectory Selection ------------------
traj_index = 0 # changes only after reaching end of this trajectory 
traj_id = traj_id_arr[traj_index] # type:int this does not change too often 
traj_info_path = os.path.join(os.getcwd(), "atari_v1/trajectories/revenge/", f'{traj_id}.txt')
traj_image_path = os.path.join(traj_dir, f'{traj_id}')

traj_df = pd.read_csv(traj_info_path, skiprows=1)
traj_df.columns = traj_df.columns.str.strip()
traj_df.terminal = traj_df.terminal.str.strip() == 'True'
# ---------- END ----------------------------------
start_frame = 0  # we do not modify this in GUI, rather we automatically copy prev end_frame here
end_frame = start_frame # we modify the end_frame in GUI  
current_frame = end_frame
category = "room"
replay_type = "good" # good or bad 
description = "" # changes quite often 
room_number = None # changes quite often 
dataset_name = "human"

entry_ind = 0 # the index to store the current data into dicts 

In [None]:
# we need to write the GUI code (using PySimpleGUI) that can capture events
# init image 
imagepath = f"{os.path.join(traj_image_path, str(start_frame)+'.png')}"
im = cv2.imread(imagepath)
imS = cv2.resize(im, (256, 256))
actiontext = f"{action_meaning[int(traj_df['action'][current_frame])]}"
imS = cv2.putText(imS, actiontext, org = (0, 185), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                 fontScale=1, color = (255,255,255))
imS = cv2.putText(imS, f"f:{current_frame}", org = (0, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                 fontScale=1, color = (255,255,255))
imBytes = cv2.imencode('.png', imS)[1].tobytes()

# Define the window's contents
layout = [
            [sg.Text("Trajectory ID")],
            [sg.Text(str(traj_id), size= (6, 1), key='k_traj_id')],
    
            [sg.Text("Dataset Name")],
            [sg.Text(dataset_name, size= (6, 1), key='k_dataset_name')],
    
            [sg.Text("Category")], 
            [sg.Radio('room', "CatRad", key="k_catrad_room", default=True if category == 'room' else False),
             sg.Radio('sentence', "CatRad", default=True if category == 'sentence' else False)],
            
            [sg.Text("Type")], 
            [sg.Radio('good', "typeRad",enable_events=True, font="_ 30",background_color="green" if replay_type == 'good' else "white",  key="k_typerad_good", default=True if replay_type == 'good' else False),
             sg.Radio('bad', "typeRad", enable_events=True, font="_ 30",background_color="red" if replay_type == 'bad' else "white", key="k_typerad_bad", default=True if replay_type == 'bad' else False)],

    
            [sg.Text("Parsing Speed")],
            [sg.Slider(range=(1, 100), orientation='h', size=(20, 20), default_value=1, key="k_parsing_speed")],
    
            [sg.Text("Room Number")],
            [sg.Input(room_number, size= (6, 1), key='k_room_number', font="_ 30")],
    
            [sg.Text("Description")], 
            [sg.Multiline("",size=(40, 10), key="k_description")],
    
            # Screenshot frame
            [sg.Frame("Screenshot", [
                [sg.Image(data=imBytes, key="k_image", size=(256,256))], # use OpenCV later to import image 
                [sg.Text(f"Current Frame: {current_frame}" , key="k_currentframetxt"), sg.Text(f"entry_ind: {entry_ind}", key="k_entry_ind")], # this will change according to FrameRadio button too
            ])],
    
            [sg.Frame("", [[sg.Radio('StartFrame', "FrameRad",enable_events =True, key="k_startrad")],[sg.Text(f"Start Frame: {start_frame}", key="k_startframetxt")]]) ,
            sg.Frame("",[[sg.Radio("EndFrame", "FrameRad",enable_events =True, key="k_endrad", default=True)],[sg.Text(f"End Frame: {end_frame}" , key="k_endframetxt")]])],
            [sg.Text("", key="k_warning")],
            [sg.Button('Record'), sg.Checkbox("NextTraj", default=False, key="k_nexttraj_check"), sg.Button('Quit')]
         ]

# Create the window
window = sg.Window('Trajectory Annotation', layout, return_keyboard_events=True, use_default_focus= False)


# Display and interact with the Window using an Event Loop
while True:
    # ------------------ Event Handle ---------------
    event, values = window.read(timeout = 20) # set timeout so as to refresh the image
    # See if user wants to quit or window was closed
    if event == sg.WINDOW_CLOSED or event == 'Quit':
        break
    # change current frame by keypress event 
    if event == "Left:113":
        current_frame-=int(values['k_parsing_speed'])
        if current_frame < traj_df.frame.min():
            current_frame = traj_df.frame.min()
    if event == "Right:114":
        current_frame+=int(values['k_parsing_speed'])
        if current_frame > traj_df.frame.max():
            current_frame = traj_df.frame.max()
    
    # when swap focus, change the current_frame to target frame 
    if event == "k_startrad":
        current_frame = start_frame
    
    if event == "k_endrad":
        current_frame = end_frame
        
    if event == "k_typerad_good":
        window['k_typerad_good'].update(background_color="green")
        window['k_typerad_bad'].update(background_color="white")
    if event == "k_typerad_bad":
        window['k_typerad_good'].update(background_color="white")
        window['k_typerad_bad'].update(background_color="red")
        
    # ------------- Constant Updates ----------------
    # load the image using OpenCV
    refreshflag = False
    if values['k_startrad']:
        if start_frame != current_frame:
            refreshflag = True
            start_frame = current_frame
            imagepath = f"{os.path.join(traj_image_path, str(start_frame)+'.png')}"
    else:
        if end_frame != current_frame:
            refreshflag= True
            end_frame = current_frame
            imagepath = f"{os.path.join(traj_image_path, str(end_frame)+'.png')}"
            
    if refreshflag: 
        im = cv2.imread(imagepath)
        imS = cv2.resize(im, (256, 256))
        actiontext = f"{action_meaning[int(traj_df['action'][current_frame])]}"
        imS = cv2.putText(imS, actiontext, org = (0, 185), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                         fontScale=1, color = (255,255,255))
        imS = cv2.putText(imS, f"f:{current_frame}", org = (0, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                         fontScale=1, color = (255,255,255))
        imBytes = cv2.imencode('.png', imS)[1].tobytes()
        window['k_image'].update(data=imBytes)

        # update Start Frame End Frame Current Frame Element 
        window["k_startframetxt"].update(value="Start Frame: "+ str(start_frame))
        window["k_endframetxt"].update(value="End Frame: "+str(end_frame))
        window["k_currentframetxt"].update(value="Current Frame: "+str(current_frame))
    
    
        if start_frame > end_frame:
            window["k_warning"].update("Warning: start frame > end frame", text_color="red")
        else:
            window["k_warning"].update("")
    
    # ---------- Press Record -------------
    if event == "Record":
    # Update Variable and Store data into dict
        category = "room" if window['k_catrad_room'].get() else "sentence"
        replay_type = "good" if window['k_typerad_good'].get() else "bad"
        description = window['k_description'].get()
        room_number = window['k_room_number'].get()
        
        trajectory_id_dict[entry_ind] = traj_id
        start_frame_dict[entry_ind] = start_frame
        end_frame_dict[entry_ind] = end_frame
        category_dict[entry_ind] = category
        description_dict[entry_ind] = description
        room_number_dict[entry_ind] = room_number
        dataset_name_dict[entry_ind] = dataset_name
        replay_type_dict[entry_ind] = replay_type
        entry_ind += 1

        # Reset Variables and Refresh GUI

        start_frame = end_frame + 1 if (end_frame + 1) < traj_df.frame.max() else traj_df.frame.max()
        end_frame = start_frame
        current_frame = end_frame
        # Move to new Trajectory if we reach end 
        if window['k_nexttraj_check'].get():
            if traj_index < len(traj_id_arr) - 1:
                traj_index += 1
                traj_id = traj_id_arr[traj_index]
                traj_info_path = os.path.join(os.getcwd(), "atari_v1/trajectories/revenge/", f'{traj_id}.txt')
                traj_image_path = os.path.join(traj_dir, f'{traj_id}')

                traj_df = pd.read_csv(traj_info_path, skiprows=1)
                traj_df.columns = traj_df.columns.str.strip()
                traj_df.terminal = traj_df.terminal.str.strip() == 'True'
                

                description = ""
                room_number = ""
                start_frame = 0
                end_frame = 0 
                current_frame = 0



        # ----- refresh -------
        window['k_nexttraj_check'].update(False)

        window['k_traj_id'].update(traj_id)
        window['k_dataset_name'].update(dataset_name)
        if category == "room":
            window['k_catrad_room'].update(True)
        else:
            window['k_catrad_room'].update(False)
            
        if replay_type == "good":
            window['k_typerad_good'].update(True, background_color="green")
            window['k_typerad_bad'].update(background_color="white")
        else:
            window['k_typerad_good'].update(False, background_color="white")
            window['k_typerad_bad'].update(background_color="red")
        window['k_room_number'].update(room_number)
        window['k_description'].update("")
        window['k_endrad'].update(True)
        window['k_entry_ind'].update(f'entry_ind: {entry_ind}')
    
# Finish up by removing from the screen
window.close()

# print last output
if entry_ind != 0:
    print("The Last Output index is", entry_ind - 1)
    display_annotation(entry_ind - 1)
    print("If Restart the kernel, remember to update <traj_index> and <start_frame> variable")

In [None]:
# PLEASE SAVE to csv file 
save_to_csv()

In [None]:
window.close()

In [None]:
# load data example
df = pd.read_csv("./annotations/0_timestamp_22_20_09-01-06-2022.csv")
df

## Check screenshots using OpenCV

In [None]:
# check action 
start_frame = 230
cv2.namedWindow("output", cv2.WINDOW_NORMAL)
for index in range(start_frame, start_frame+200):
    image_path = os.path.join(observation_path, f"{index}.png")
    im = cv2.imread(image_path)
    imS = cv2.resize(im, (256, 256))
    text = f"{action_meaning[int(df['action'][index])]}"
    imS = cv2.putText(imS, text, org = (0, 185), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                     fontScale=1, color = (255,255,255))
    imS = cv2.putText(imS, f"f:{index}", org = (0, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                     fontScale=1, color = (255,255,255))
    cv2.imshow('output', imS)
    cv2.waitKey(100)
cv2.destroyAllWindows()

In [None]:
# check action and write into gif
start_frame = 230
cv2.namedWindow("output", cv2.WINDOW_NORMAL)
with imageio.get_writer("human_replay_traj.gif", mode="I") as writer:
    for index in range(start_frame, start_frame+200):
        image_path = os.path.join(observation_path, f"{index}.png")
        im = cv2.imread(image_path)
        imS = cv2.resize(im, (256, 256))
        text = f"{action_meaning[int(df['action'][index])]}"
        imS = cv2.putText(imS, text, org = (0, 185), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                         fontScale=1, color = (255,255,255))
        imS = cv2.putText(imS, f"f:{index}", org = (0, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                         fontScale=1, color = (255,255,255))
        cv2.imshow('output', imS)
        rgb_frame = cv2.cvtColor(imS, cv2.COLOR_BGR2RGB)
        writer.append_data(rgb_frame)
        cv2.waitKey(100)
    cv2.destroyAllWindows()