In [None]:
import os
import sys
import json
import copy
from datetime import datetime

from openai import OpenAI
import yaml
import numpy as np

sys.path.append("../")
from src.utils.parser import *
from src.utils.feature_utils import theta_to_language, theta_to_state_mask

# Set your OpenAI API key
try:
    client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
except KeyError:
    # read from ~/.openai_api_key
    with open(os.path.expanduser("~/.openai_api_key"), "r") as f:
        api_key = f.read().strip()
    client = OpenAI(api_key=api_key)

%load_ext autoreload
%autoreload 2

In [None]:
task_instruction = "The task is to move a coffee grasped with the robot's end effector where there is a human user nearby."


def paraphrase_instruction(
    instruction: str,
    model_text: str = "gpt-4o-mini",
) -> str:
    """
    Paraphrase a language instruction using OpenAI API.
    
    Args:
        instruction: The original instruction to paraphrase
        model_text: The OpenAI model to use for paraphrasing
        
    Returns:
        Paraphrased instruction string
    """
    messages = [
        {"role": "system", "content": task_instruction + " Paraphrase the language instruction."},
        {"role": "user", "content": f"Original instruction:\n{instruction}"}
    ]
    resp = client.chat.completions.create(
        model=model_text,
        messages=messages,
        temperature=0.7
    )
    return resp.choices[0].message.content.strip()


def process_instruction(
    instruction: str,
    model_text: str = "gpt-4o-mini",
    num_instructions: int = 1
) -> dict:
    """
    Process instruction through paraphrasing with retry logic.
    
    Args:
        instruction: The original instruction to process
        model_text: The OpenAI model to use
        num_instructions: Number of paraphrased versions to generate
        
    Returns:
        Dictionary with 'paraphrased_instruction' key containing list of paraphrased instructions
    """
    paraphrased_instruction_list = []
    
    for _ in range(num_instructions):
        num_trials = 0
        max_trials = 3
        paraphrased_instruction = None
        
        while num_trials < max_trials:
            try:
                paraphrased_instruction = paraphrase_instruction(
                    instruction,
                    model_text=model_text
                )
                break  # Success, exit retry loop
            except Exception as e:
                num_trials += 1
                print(f"Error: {e}")
                if num_trials < max_trials:
                    print("Retrying...")
                else:
                    print("Max trials reached. Skipping this instruction.")
                    return None
        
        if paraphrased_instruction is not None:
            paraphrased_instruction_list.append(paraphrased_instruction)
        
    return {
        "paraphrased_instruction": paraphrased_instruction_list,
    }



In [None]:
def get_theta_key(theta):
    """
    Generate an interpretable key for a given theta vector.
    
    Args:
        theta: A vector with elements in {-1, 0, 1}
        
    Returns:
        String key, e.g., "-1_0_1_1_0" for theta [-1, 0, 1, 1, 0]
    """
    return '_'.join(str(x) for x in theta)


# Load configuration files
human_config = "../config/humans/frankarobot_multiple_humans.yaml"
with open(human_config, "r") as stream:
    humans_params_list = yaml.safe_load(stream)

config = "../config/reward_learning/obj20_sg10_persg5/frankarobot_obj20_sg10_persg5_maskedrl_llm_mask_mweight1_mnoise1_hidden128_256_128.yaml"
with open(config, "r") as stream:
    params = yaml.safe_load(stream)

# Load data split configuration
indices_file = os.path.join(params["irl"]["data_split_config_path"], "split_indices.json")
load_split_data(
    params["env"]["trajset_file"],
    params["env"]["per_SG"],
    params["env"]["train_test_split"],
    indices_file=indices_file
)

seed = 12345
set_seed(seed)

# Paraphrase Language Instructions

In [None]:
results_list = copy.deepcopy(humans_params_list["humans"])
resume_path = None
previous_resume_path = None

# Load resume data if specified
if resume_path is not None:
    with open(resume_path, "r") as f:
        resume_results_list = json.load(f)
    print(f"Resuming from {resume_path}")

# Process each human's preferences
for human_idx, human_params in enumerate(humans_params_list["humans"]):
    # Skip if already processed (when resuming)
    if resume_path is not None and 'paraphrased_instruction' in resume_results_list[human_idx]:
        results_list[human_idx] = resume_results_list[human_idx]
        continue
        
    print(f"Processing human {human_idx}/{len(humans_params_list['humans'])}")
    
    # Extract theta and generate language instruction
    theta = human_params["preferencer"]["theta"]
    theta_key = get_theta_key(theta)
    gt_state_mask = theta_to_state_mask(theta, state_dim=19).astype(int)
    language_instruction = theta_to_language([theta])[0]
    
    print(f"Human theta: {theta}")
    print(f"Language instruction: {language_instruction}")

    # Store metadata
    results_list[human_idx]["language_instruction"] = language_instruction
    results_list[human_idx]["theta_key"] = theta_key
    results_list[human_idx]["gt_state_mask"] = gt_state_mask
    
    # Generate paraphrased instructions
    processed = process_instruction(
        language_instruction,
        model_text="gpt-4o-mini",
        num_instructions=5,
    )
    
    if processed is None:
        print(f"Warning: Failed to process instruction for human {human_idx}")
        continue

    # Store results
    results_list[human_idx]["paraphrased_instruction"] = processed["paraphrased_instruction"]
    results_list[human_idx]["omit_referent_instruction"] = theta_to_language(
        [theta], omit_referent=True, omit_expression=False
    )[0]
    results_list[human_idx]["omit_expression_instruction"] = theta_to_language(
        [theta], omit_referent=False, omit_expression=True
    )[0]
    
    print(f"Paraphrased instruction: {processed['paraphrased_instruction']}")

    # Save checkpoint every 12 humans
    if human_idx % 12 == 0 and human_idx > 0:
        if previous_resume_path is not None:
            os.remove(previous_resume_path)
            print(f"Removed previous resume file {previous_resume_path}")
        
        timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
        results_path = os.path.join(
            params["irl"]["data_split_config_path"],
            f"language_paraphrased_temp0.7_{seed}_{timestamp_str}.json"
        )
        with open(results_path, "w") as f:
            json.dump(results_list, f, indent=4, default=jsonNpEncoder)
        print(f"Saved checkpoint to {results_path}")
        previous_resume_path = results_path

# Save final results
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
results_path = os.path.join(
    params["irl"]["data_split_config_path"],
    f"language_paraphrased_temp0.7_{seed}_{timestamp_str}.json"
)
with open(results_path, "w") as f:
    json.dump(results_list, f, indent=4, default=jsonNpEncoder)
print(f"Saved final results to {results_path}")    

