# Verify that our non-ideal teacher simulates human labelers

In [None]:
import os
import numpy as np
import pandas as pd
from IPython.display import display
import time
path = './human_query/'

# setting
env_name = 'hammer'  # ['dial-turn', 'hammer', 'walker-walk']
rd = 100  # [10, 100, 300]


human_query_csv_path = os.path.join(path, f'{env_name}_rd_{rd}.csv')
# query_idx_0, query_idx_1, rd
human_query_df = pd.read_csv(human_query_csv_path)
print(f'env_name: {env_name}, return difference: {rd}')
print(human_query_df.head())

In [40]:
result_df = human_query_df.copy()
result_df['gt_label'] = result_df.apply(lambda x: "1 0" if x['return_0'] > x ['return_1'] else "0 1", axis=1)
result_df['human_label'] = None

## Prompt

Here are the prompts for the three tasks:

### 1. **Metaworld Dial-Turn Task**

**Task Purpose**:  
In this task, you will be comparing two segments of a robotic arm trying to turn a dial. Your goal is to evaluate which segment performs better in achieving the task's objectives.

**Instructions**:  
- **Step 1**: First, choose the segment where the robot's arm reaches the dial more accurately (the **reach** component).  
- **Step 2**: If the reach performance is the same in both segments, then choose the one where the robot's gripper is closed more appropriately (the **gripper closed** component).  
- **Step 3**: If both reach and gripper closure are equal, choose the segment that has the robot's arm placed closer to the target position (the **in place** component).  

### 2. **Metaworld Hammer Task**

**Task Purpose**:  
In this task, you will be comparing two segments where a robotic arm is hammering a nail. The aim is to evaluate which segment results in a better execution of the hammering process.

**Instructions**:  
- **Step 1**: First, choose the segment where the hammerhead is in better position and the nail is properly hit (the **reward in place** component).  
- **Step 2**: If the hammerhead positioning is similar in both segments, choose the one where the robot is better holding the hammer and the nail (the **reward grab** component).  
- **Step 3**: If both the hammerhead position and grasping are the same, select the segment where the orientation of the hammer is more suitable (the **reward quaternion** component).  

### 3. **DMControl Walker-Walk Task**

**Task Purpose**:  
In this task, you will compare two segments where a bipedal robot is attempting to walk. Your goal is to determine which segment shows better walking performance.

**Instructions**:  
- **Step 1**: First, choose the segment where the robot stands more stably (the **standing** reward).  
- **Step 2**: If both segments have the same stability, choose the one where the robot moves faster or more smoothly (the **move reward**).  
- **Step 3**: If both standing and moving are comparable, select the segment where the robot maintains better upright posture (the **upright** reward).  



# Human feedback

- If you prefer the segment above (press 1 0), prefer the segment below (press 0 1), cannot distinguish between them (press 0 0)

- If you want to quit the program (input 'quit' or 'exit')


In [None]:
from IPython.display import clear_output, HTML, Image
import base64

for i in range(20):
    clear_output()
    print(f"{i+1}th among total 20 feedbacks")
    query_idx_0 = human_query_df.iloc[i]['query_idx_0']
    query_idx_1 = human_query_df.iloc[i]['query_idx_1']
    print(f'query_idx_0: {query_idx_0}, query_idx_1: {query_idx_1}')

    segment_0 = os.path.join('./', 'video_' + env_name, f'{query_idx_0}.gif')
    segment_1 = os.path.join('./', 'video_' + env_name, f'{query_idx_1}.gif')
    print(f'segment_0: {segment_0}')
    print(f'segment_1: {segment_1}')

    time.sleep(0.1)
    display(HTML(f'''
    <div style="display: inline-block; margin-right: 100px;">
        <img src="{segment_0}" width="400" loop="true" >
    </div>
    '''))
    time.sleep(0.1)
    display(HTML(f'''
    <div style="display: inline-block;">
        <img src="{segment_1}" width="400" loop="true" >
    </div>
    '''))

    # display(Image(filename=segment_0, width=300))
    # time.sleep(0.1)
    # print('vs')
    # display(Image(filename=segment_1, width=300))
    time.sleep(1)
    select = input('select')
    if select == 'quit' or select == 'exit':
        break    
    result_df.loc[i, 'human_label'] = select

    result_df.to_csv(os.path.join(path, f'{env_name}_rd_{rd}_human.csv'), index=False)
