In [12]:
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
%load_ext autoreload
%autoreload 2

In [6]:
from generate_instruction import *

In [8]:
seed_tasks_path = "seed_tasks.jsonl"
seed_tasks = [json.loads(l) for l in open(seed_tasks_path, "r")]
seed_instruction_data = [
        {"instruction": t["instruction"], "input": t["instances"][0]["input"], "output": t["instances"][0]["output"]}
        for t in seed_tasks
    ]
print(f"Loaded {len(seed_instruction_data)} human-written seed instructions")

Loaded 175 human-written seed instructions


In [15]:
prompt_manipulation=[
    {"role": "system", "content": 
     '''We have a manipulator and we want you to help plan how it should move to perform tasks using the following template:
        [start of plan]
        [optional] To perform this task, the manipulator's gripper should move close to object1={CHOICE: cube, apple, box_left_side, box_right_side, box_center}.
        [optional] object1 should be close to object2={CHOICE: cube, apple, box_left_side, box_right_side, box_center}.
        [optional] object1 needs to be rotated by {NUM: 0.0} degrees along x axis.
        [optional] object1 needs to be rotated by {NUM: 0.0} degrees along z axis.
        [optional] object2 needs to be rotated by {NUM: 0.0} degrees along x axis.
        [optional] object2 needs to be rotated by {NUM: 0.0} degrees along z axis.
        [optional] object1 needs to be lifted to a height of {NUM: 0.0}m at the end.
        [optional] object1 needs to be moved by {NUM: 0.0}m in x axis at the end.
        [optional] object1 needs to be moved by {NUM: 0.0}m in y axis at the end.
        [optional] object2 needs to be lifted to a height of {NUM: 0.0}m at the end.
        [optional] object2 needs to be moved by {NUM: 0.0}m in x axis at the end.
        [optional] object2 needs to be moved by {NUM: 0.0}m in y axis at the end.
        [optional] This {CHOICE: is, is not} the first plan for a new task.
        [end of plan]

        Rules:
        1. If you see phrases like [NUM: default_value], replace the entire phrase with a numerical value.
        2. If you see phrases like {CHOICE: choice1, choice2, ...}, it means you should replace the entire phrase with one of the choices listed.
        3. If you see [optional], it means you only add that line if necessary for the task, otherwise remove that line.
        4. Do not invent new objects not listed here.
        5. I will tell you a behavior/skill/task that I want the manipulator to perform and you will provide the full plan, even if you may only need to change a few lines. Always start the description with [start of plan] and end it with [end of plan].
        6. You can assume that the robot is capable of doing anything, even for the most challenging task.
        7. Your plan should be as close to the provided template as possible. Do not include additional details.
        8. The robot is facing towards positive x-axis, positive y-axis is to the left of the robot, positive z-axis is upward.
        9. Objects are free to move around unless a command is given.'''
    },
    {"role": "user", "content": 
     '''We have a plan of a robot arm with gripper to manipulate objects and we want you to turn that into the corresponding reward specifying program with following functions:

        ```
        def set_l2_distance_reward(name_obj_A, name_obj_B)
        ```
        where name_obj_A and name_obj_B are selected from ['gripper', 'cube', 'apple', 'box_left_side', 'box_right_side', 'box_center'].
        This term sets a reward for minimizing l2_distance between name_obj_A and name_obj_B so they get closer to each other.

        ```
        def set_obj_orientation_reward(name_obj, x_axis_rotation_radians, z_axis_rotation_radians)
        ```
        this term encourages the orientation of name_obj to be close to the target.

        ```
        def execute_plan(duration=2)
        ```
        This function sends the parameters to the robot and execute the plan for `duration` seconds, default to be 2

        ```
        def set_obj_position_reward(name_obj, x_offset, y_offset, z_height)
        ```
        this term encourages the position of name_obj to be close to the specified target position.

        ```
        def reset_reward()
        ```
        This function resets the reward to default values.

        This is the first plan for a new task.

        Example answer code:
        ```
        import numpy as np

        reset_reward() # This is a new task so reset reward; otherwise we don't need it
        set_l2_distance_reward("gripper", "apple")
        set_l2_distance_reward("apple", "box_center")
        set_obj_orientation_reward("cube", np.deg2rad(30), 0)
        set_obj_position_reward("bowl", 0, 0, 1.0)

        execute_plan(4)
        ```

        Remember:
        1. Always format the code in code blocks. In your response execute_plan should be called exactly once at the end.
        2. Do not invent new functions or classes. The only allowed functions you can call are the ones listed above. Do not leave unimplemented code blocks in your response.
        3. The only allowed library is numpy. Do not import or use any other library.
        4. If you are not sure what value to use, just use your best judge. Do not use None for anything.
        5. Do not calculate the position or direction of any object (except for the ones provided above). Just use a number directly based on your best guess.
        6. You do not need to make the robot do extra things not mentioned in the plan such as stopping the robot.'''
     },
]

In [18]:
output_dir = "gpt4_chat_completion"
num_instructions_to_generate = 1
num_prompt_instructions = 1
request_batch_size = 1
temperature = 1.0
top_p = 1.0
num_cpus = 8
model_name = "gpt-3.5-turbo"
response = openai.ChatCompletion.create(
    model=model_name,
    messages=prompt_manipulation,
    temperature=temperature,
    top_p=top_p,
    max_tokens=1024,
)
response

<OpenAIObject chat.completion id=chatcmpl-7WA5gWvZ7x4jL74ZXeMQhnZyaHuUv at 0x7fefd6b41530> JSON: {
  "id": "chatcmpl-7WA5gWvZ7x4jL74ZXeMQhnZyaHuUv",
  "object": "chat.completion",
  "created": 1687899640,
  "model": "gpt-3.5-turbo-0301",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "[start of plan]\nTo perform this task, the manipulator's gripper should move close to object1=\"cube\".\nobject1 should be close to object2=\"apple\".\nobject1 needs to be rotated by 0.0 degrees along x axis.\nobject1 needs to be rotated by 0.0 degrees along z axis.\nobject2 needs to be rotated by 0.0 degrees along x axis.\nobject2 needs to be rotated by 0.0 degrees along z axis.\nobject1 needs to be lifted to a height of 0.3m at the end.\nobject1 needs to be moved by 0.2m in x axis at the end.\nobject1 needs to be moved by -0.1m in y axis at the end.\nobject2 needs to be lifted to a height of 0.5m at the end.\nobject2 needs to be moved by 0.1m in

In [19]:
response['choices'][0]['message']['content']


'[start of plan]\nTo perform this task, the manipulator\'s gripper should move close to object1="cube".\nobject1 should be close to object2="apple".\nobject1 needs to be rotated by 0.0 degrees along x axis.\nobject1 needs to be rotated by 0.0 degrees along z axis.\nobject2 needs to be rotated by 0.0 degrees along x axis.\nobject2 needs to be rotated by 0.0 degrees along z axis.\nobject1 needs to be lifted to a height of 0.3m at the end.\nobject1 needs to be moved by 0.2m in x axis at the end.\nobject1 needs to be moved by -0.1m in y axis at the end.\nobject2 needs to be lifted to a height of 0.5m at the end.\nobject2 needs to be moved by 0.1m in x axis at the end.\nobject2 needs to be moved by 0.1m in y axis at the end.\nThis is the first plan for a new task.\n[end of plan]\n\nimport numpy as np\n\nreset_reward()\nset_l2_distance_reward("gripper", "cube")\nset_l2_distance_reward("apple", "cube")\nset_obj_orientation_reward("cube", np.deg2rad(0), 0)\nset_obj_position_reward("cube", 0.2,

In [17]:
output_dir = "gpt4_chat_completion"
num_instructions_to_generate = 1
num_prompt_instructions = 1
request_batch_size = 1
temperature = 1.0
top_p = 1.0
num_cpus = 8
model_name = "gpt-3.5-turbo"

os.makedirs(output_dir, exist_ok=True)

request_idx = 0
scorer = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=False)

progress_bar = tqdm.tqdm(total=num_instructions_to_generate)

# first we tokenize all the seed instructions and generated machine instructions
all_instructions = [d["instruction"] for d in seed_instruction_data]
all_instruction_tokens = [scorer._tokenizer.tokenize(inst) for inst in all_instructions]
machine_instruction_data = []

batch_inputs = []

    # only sampling from the seed tasks
prompt_instructions = random.sample(seed_instruction_data, num_prompt_instructions)
prompt = encode_prompt(prompt_instructions)

batch_inputs.append(prompt_manipulation)


decoding_args = utils.OpenAIDecodingArguments(
    temperature=temperature,
    n=1,
    max_tokens=512,  # hard-code to maximize the length. the requests will be automatically adjusted
    top_p=top_p,
    stop=["\n20", "20.", "20."],
)
request_start = time.time()
results = utils.openai_chatcompletion(
    prompts=batch_inputs,
    model_name=model_name,
    batch_size=request_batch_size,
    decoding_args=decoding_args,
    logit_bias={"50256": -100},  # prevent the <|endoftext|> token from being generated
)
request_duration = time.time() - request_start
results
    # process_start = time.time()
    # instruction_data = []
    # for result in results:
    #     new_instructions = post_process_gpt3_response(num_prompt_instructions, result)
    #     instruction_data += new_instructions

    # total = len(instruction_data)
    # keep = 0
    # for instruction_data_entry in instruction_data:
    #     # computing similarity with the pre-tokenzied instructions
    #     new_instruction_tokens = scorer._tokenizer.tokenize(instruction_data_entry["instruction"])
    #     with Pool(num_cpus) as p:
    #         rouge_scores = p.map(
    #             partial(rouge_scorer._score_lcs, new_instruction_tokens),
    #             all_instruction_tokens,
    #         )
    #     rouge_scores = [score.fmeasure for score in rouge_scores]
    #     most_similar_instructions = {
    #         all_instructions[i]: rouge_scores[i] for i in np.argsort(rouge_scores)[-10:][::-1]
    #     }
    #     if max(rouge_scores) > 0.7:
    #         continue
    #     else:
    #         keep += 1
    #     instruction_data_entry["most_similar_instructions"] = most_similar_instructions
    #     instruction_data_entry["avg_similarity_score"] = float(np.mean(rouge_scores))
    #     machine_instruction_data.append(instruction_data_entry)
    #     all_instructions.append(instruction_data_entry["instruction"])
    #     all_instruction_tokens.append(new_instruction_tokens)
    #     progress_bar.update(1)
    # process_duration = time.time() - process_start
    # print(f"Request {request_idx} took {request_duration:.2f}s, processing took {process_duration:.2f}s")
    # print(f"Generated {total} instructions, kept {keep} instructions")
    # utils.jdump(machine_instruction_data, os.path.join(output_dir, "regen.json"))

  0%|          | 0/1 [00:33<?, ?it/s]
prompt_batches:   0%|          | 0/1 [00:15<?, ?it/s]


KeyboardInterrupt: 