In [1]:
import openai
import os
openai.api_key = os.getenv("OPENAI_API_KEY")
%load_ext autoreload
%autoreload 2

In [2]:
from generate_robot_instruction import encode_task_generation_prompt, generate_task_data, encode_instruct_prompt, generate_instruction_following_chat_data, post_process_chat_response

### Generate Tasks

In [3]:
generate_task_data(
    output_dir="./gpt4_generation/task_generation_v1/",
    num_tasks_to_generate=100,
    prompt_file_name="./prompts_for_gpt/robot_task_prompt_v1.txt",
    model_name="gpt-4-0314"
)

  0%|          | 0/100 [00:00<?, ?it/s]

### [[{'role': 'user', 'content': "You are serving as a task-generation helper for a given robot environment.\xa0\n\n[Environment Description]\nHere is the information about the environment. The environment is called {Two Block World}\xa0\nThere is a 7DOF Franka robot with a parallel gripper.\xa0\nThere are multiple blocks, {block A}, {block B}, ..., with randomized sizes and density in the environment.\nThe Franka robot has a force sensor on the end effector.\nThe Franka robot is mounted on a table.\nThe blocks are initialized at a random position on the table.\nContents in the {} are the names of the objects in the environment.\n\nThe observation of the robot includes:\nThe position and orientation of both blocks and the end effector of the robot\nThe force sensor on the end effector. \n\n[Your Task]\nCome up with 50 different tasks for the robot to perform.\n\n[Output format]\nThe response should follow the template below:\n### Task {i}: {task description}\nwhere {i} is the task num

prompts: 100%|██████████| 1/1 [01:36<00:00, 96.42s/it]
  1%|          | 1/100 [01:36<2:39:05, 96.42s/it]

['', ' Task 1: Pick up block A and place it on top of block B.\n', ' Task 2: Place block A and block B side by side, touching each other.\n', ' Task 3: Move block A to the center of the table.\n', ' Task 4: Move block B to the edge of the table without causing it to fall off.*\n', ' Task 5: Stack block A and block B to maximize the height of the two blocks.*\n', ' Task 6: Move the lighter block to the left corner of the table and the heavier block to the right corner of the table.*\n', ' Task 7: Stack both blocks on top of each other and ensure they are aligned.*\n', ' Task 8: Move both blocks at least 30 cm away from each other.*\n', ' Task 9: Move both blocks to the left half of the table, keeping at least 10 cm distance between them.*\n', ' Task 10: Pick up and place the lighter block on top of the heavier block, ensuring they touch at only one point.*\n', ' Task 11: Move block B directly adjacent to block A without the two blocks touching each other.\n', ' Task 12: Move both blocks

prompts: 100%|██████████| 1/1 [01:26<00:00, 86.49s/it]
100%|██████████| 100/100 [03:02<00:00,  1.83s/it]

['', ' Task 1: Pick up block A and place it on top of block B\n', ' Task 2: Move block B to the opposite corner of the table from its initial position\n', ' Task 3: Stack the blocks in order of smallest to largest, starting with the smallest on the bottom^\n', ' Task 4: Move both blocks to one edge of the table\n', ' Task 5: Lift block A without grasping it, only using the force exerted by the end effector*\n', ' Task 6: Arrange the blocks in alphabetical order from left to right on the table\n', ' Task 7: Place block A on its longest edge*\n', ' Task 8: Rearrange the blocks in order of lightest to heaviest block*\n', ' Task 9: Move block B to the center of the table\n', ' Task 10: Balance block A on top of block B on a single edge*\n', ' Task 11: Stack the blocks with block B on top but not touching the table*\n', ' Task 12: Move block B such that it is within 5 cm of block A\n', ' Task 13: Place both blocks in a vertical orientation*\n', ' Task 14: Construct a tower with three blocks




#### Split task data
Split it into tasks used by instruction generation.

In [10]:
import re, os
import string
import json
def split_json_file(json_file, 
                    output_path="./gpt4_generation/task_generation/", n=2):
  """Splits a JSON file into N JSONL files, where each file contains n elements.

  Args:
    json_file: The path to the JSON file to split.
    n: The number of JSONL files to create.

  Returns:
    The list of paths to the created JSONL files.
  """

  with open(json_file, "r") as f:
    data = json.load(f)
  os.makedirs(output_path, exist_ok=True)
  jsonl_files = []
  for i in range(0, len(data), n):
    jsonl_file = f"{output_path}split_{i // n}.jsonl"
    with open(jsonl_file, "w") as outfile:
      for record in data[i : i + n]:
        json.dump(record, outfile)
        outfile.write("\n")

    jsonl_files.append(jsonl_file)
  return jsonl_files

In [11]:
jsonl_files = split_json_file("./gpt4_generation/task_generation/task_regen.json", n=50)

### Generate COT instruct data

In [5]:
from generate_robot_instruction import generate_instruction_following_chat_data


ret = generate_instruction_following_chat_data(output_dir="./gpt4_generation/cot_full_v3",
                                        instruction_prompt_file="./prompts_for_gpt/robot_cot_prompts_v3.txt",
                                        seed_example_path="./prompts_for_gpt/seeded_example_cot_v3.jsonl",
                                        seed_tasks_path="./gpt4_generation/task_generation/split_0.jsonl",
                                        num_instructions_to_generate=300,
                                        output_file="cot_instrut_regen.json"
                                        )

  0%|          | 0/300 [00:00<?, ?it/s]

### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have randomized weight, size.\nThe observation space (when there are two blocks) is a 26 dimension vector, consisting of:\nbl

prompts: 100%|██████████| 1/1 [02:22<00:00, 142.64s/it]
  0%|          | 1/300 [02:22<11:50:48, 142.64s/it]

  2%|▏         | 6/300 [02:22<11:38:55, 142.64s/it]
Request 1 took 142.64s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:00<00:00, 60.59s/it]


  2%|▏         | 6/300 [03:23<11:38:55, 142.64s/it]
Request 2 took 60.59s
Generated 0 instructions, kept 0 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:29<00:00, 89.10s/it]
  2%|▏         | 7/300 [04:52<3:01:53, 37.25s/it]  

  3%|▎         | 10/300 [04:52<3:00:01, 37.25s/it]
Request 3 took 89.11s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [00:58<00:00, 58.94s/it]
  4%|▎         | 11/300 [05:51<2:09:29, 26.88s/it]

  5%|▍         | 14/300 [05:51<2:08:08, 26.88s/it]
Request 4 took 58.94s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [06:38<00:00, 398.65s/it]
  5%|▌         | 15/300 [12:29<4:24:50, 55.76s/it]

  6%|▌         | 17/300 [12:29<4:22:59, 55.76s/it]
Request 5 took 398.66s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [02:44<00:00, 164.42s/it]
  6%|▌         | 18/300 [15:14<4:20:43, 55.48s/it]

  8%|▊         | 23/300 [15:14<4:16:06, 55.48s/it]
Request 6 took 164.42s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [02:47<00:00, 167.04s/it]
  8%|▊         | 24/300 [18:01<3:16:42, 42.76s/it]

 10%|▉         | 29/300 [18:01<3:13:08, 42.76s/it]
Request 7 took 167.05s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:59<00:00, 119.32s/it]
 10%|█         | 30/300 [20:00<2:31:36, 33.69s/it]

 12%|█▏        | 35/300 [20:00<2:28:47, 33.69s/it]
Request 8 took 119.32s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:01<00:00, 61.29s/it]
 12%|█▏        | 36/300 [21:02<1:50:52, 25.20s/it]

 13%|█▎        | 39/300 [21:02<1:49:37, 25.20s/it]
Request 9 took 61.29s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [04:17<00:00, 257.69s/it]
 13%|█▎        | 40/300 [25:19<2:32:44, 35.25s/it]

 14%|█▍        | 43/300 [25:19<2:30:59, 35.25s/it]
Request 10 took 257.70s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [04:55<00:00, 295.85s/it]
 15%|█▍        | 44/300 [30:15<3:14:39, 45.62s/it]

 17%|█▋        | 51/300 [30:15<3:09:20, 45.62s/it]
Request 11 took 295.86s
Generated 8 instructions, kept 8 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:01<00:00, 61.33s/it]
 17%|█▋        | 52/300 [31:16<2:00:32, 29.17s/it]

 18%|█▊        | 54/300 [31:16<1:59:34, 29.17s/it]
Request 12 took 61.33s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:30<00:00, 90.67s/it]
 18%|█▊        | 55/300 [32:47<1:59:54, 29.37s/it]

 19%|█▉        | 58/300 [32:47<1:58:26, 29.37s/it]
Request 13 took 90.67s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts:   0%|          | 0/1 [00:48<?, ?it/s]


KeyboardInterrupt: 

In [4]:
from generate_robot_instruction import generate_instruction_following_chat_data


ret = generate_instruction_following_chat_data(output_dir="./gpt4_generation/cot_full_v3",
                                        instruction_prompt_file="./prompts_for_gpt/robot_cot_prompts_v3.txt",
                                        seed_example_path="./prompts_for_gpt/seeded_example_cot_v3.jsonl",
                                        seed_tasks_path="./gpt4_generation/task_generation/split_1.jsonl",
                                        num_instructions_to_generate=300,
                                        output_file="cot_instrut_regen_2.json"
                                        )

  0%|          | 0/300 [00:00<?, ?it/s]

### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have randomized weight, size.\nThe observation space (when there are two blocks) is a 26 dimension vector, consisting of:\nbl

prompts: 100%|██████████| 1/1 [01:36<00:00, 96.51s/it]
  0%|          | 1/300 [01:36<8:00:56, 96.51s/it]

  1%|▏         | 4/300 [01:36<7:56:07, 96.51s/it]
Request 1 took 96.51s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have ra

prompts: 100%|██████████| 1/1 [04:46<00:00, 286.62s/it]
  2%|▏         | 5/300 [06:23<6:10:30, 75.36s/it]

  3%|▎         | 10/300 [06:23<6:04:13, 75.36s/it]
Request 2 took 286.62s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:34<00:00, 94.63s/it]
  4%|▎         | 11/300 [07:57<2:57:36, 36.87s/it]

  5%|▌         | 16/300 [07:57<2:54:32, 36.87s/it]
Request 3 took 94.63s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [03:58<00:00, 238.99s/it]
  6%|▌         | 17/300 [11:56<3:00:37, 38.29s/it]

  8%|▊         | 24/300 [11:56<2:56:09, 38.29s/it]
Request 4 took 239.00s
Generated 8 instructions, kept 8 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:25<00:00, 85.49s/it]
  8%|▊         | 25/300 [13:22<1:55:05, 25.11s/it]

  9%|▉         | 28/300 [13:22<1:53:50, 25.11s/it]
Request 5 took 85.49s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [01:04<00:00, 64.29s/it]
 10%|▉         | 29/300 [14:26<1:43:02, 22.81s/it]

 11%|█         | 32/300 [14:26<1:41:53, 22.81s/it]
Request 6 took 64.30s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [02:07<00:00, 127.34s/it]
 11%|█         | 33/300 [16:33<1:52:13, 25.22s/it]

 12%|█▏        | 36/300 [16:33<1:50:57, 25.22s/it]
Request 7 took 127.35s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [02:19<00:00, 139.35s/it]
 12%|█▏        | 37/300 [18:53<2:02:09, 27.87s/it]

 14%|█▍        | 42/300 [18:53<1:59:50, 27.87s/it]
Request 8 took 139.35s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [03:10<00:00, 190.98s/it]
 14%|█▍        | 43/300 [22:04<2:05:40, 29.34s/it]

 16%|█▌        | 48/300 [22:04<2:03:14, 29.34s/it]
Request 9 took 190.99s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [03:11<00:00, 191.04s/it]
 16%|█▋        | 49/300 [25:15<2:06:22, 30.21s/it]

 18%|█▊        | 53/300 [25:15<2:04:21, 30.21s/it]
Request 10 took 191.05s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:15<00:00, 75.41s/it]
 18%|█▊        | 54/300 [26:30<1:45:44, 25.79s/it]

 19%|█▉        | 57/300 [26:30<1:44:27, 25.79s/it]
Request 11 took 75.42s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [02:07<00:00, 127.94s/it]
 19%|█▉        | 58/300 [28:38<1:50:16, 27.34s/it]

 20%|██        | 60/300 [28:38<1:49:22, 27.34s/it]
Request 12 took 127.94s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [03:08<00:00, 188.06s/it]
 20%|██        | 61/300 [31:46<2:18:41, 34.82s/it]

 22%|██▏       | 66/300 [31:46<2:15:47, 34.82s/it]
Request 13 took 188.07s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [02:54<00:00, 174.16s/it]
 22%|██▏       | 67/300 [34:40<2:06:44, 32.64s/it]

 25%|██▍       | 74/300 [34:40<2:02:56, 32.64s/it]
Request 14 took 174.16s
Generated 8 instructions, kept 8 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:58<00:00, 118.52s/it]
 25%|██▌       | 75/300 [36:39<1:34:28, 25.19s/it]

 26%|██▌       | 78/300 [36:39<1:33:12, 25.19s/it]
Request 15 took 118.52s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:47<00:00, 107.88s/it]
 26%|██▋       | 79/300 [38:27<1:34:18, 25.60s/it]

 27%|██▋       | 82/300 [38:27<1:33:01, 25.60s/it]
Request 16 took 107.89s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [02:06<00:00, 126.57s/it]
 28%|██▊       | 83/300 [40:33<1:37:59, 27.10s/it]

 29%|██▊       | 86/300 [40:33<1:36:38, 27.10s/it]
Request 17 took 126.58s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:48<00:00, 108.48s/it]
 29%|██▉       | 87/300 [42:22<1:36:12, 27.10s/it]

 30%|███       | 90/300 [42:22<1:34:51, 27.10s/it]
Request 18 took 108.49s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [02:19<00:00, 139.47s/it]
 30%|███       | 91/300 [44:41<1:41:45, 29.21s/it]

 31%|███▏      | 94/300 [44:41<1:40:17, 29.21s/it]
Request 19 took 139.48s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:17<00:00, 77.01s/it]
 32%|███▏      | 95/300 [45:58<1:30:18, 26.43s/it]

 33%|███▎      | 98/300 [45:58<1:28:58, 26.43s/it]
Request 20 took 77.02s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:45<00:00, 105.78s/it]
 33%|███▎      | 99/300 [47:44<1:28:33, 26.44s/it]

 34%|███▍      | 102/300 [47:44<1:27:14, 26.44s/it]
Request 21 took 105.79s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [01:13<00:00, 73.92s/it]
 34%|███▍      | 103/300 [48:58<1:19:14, 24.13s/it]

 35%|███▌      | 106/300 [48:58<1:18:01, 24.13s/it]
Request 22 took 73.93s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:33<00:00, 93.57s/it]
 36%|███▌      | 107/300 [50:32<1:16:55, 23.92s/it]

 37%|███▋      | 112/300 [50:32<1:14:56, 23.92s/it]
Request 23 took 93.57s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:55<00:00, 115.71s/it]
 38%|███▊      | 113/300 [52:27<1:08:58, 22.13s/it]

 40%|████      | 120/300 [52:27<1:06:23, 22.13s/it]
Request 24 took 115.72s
Generated 8 instructions, kept 8 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:39<00:00, 159.70s/it]
 40%|████      | 121/300 [55:07<1:03:17, 21.21s/it]

 42%|████▏     | 126/300 [55:07<1:01:31, 21.21s/it]
Request 25 took 159.70s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [01:13<00:00, 73.77s/it]
 42%|████▏     | 127/300 [56:21<53:08, 18.43s/it]  

 43%|████▎     | 130/300 [56:21<52:13, 18.43s/it]
Request 26 took 73.78s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have r

prompts: 100%|██████████| 1/1 [01:46<00:00, 106.42s/it]
 44%|████▎     | 131/300 [58:07<57:11, 20.30s/it]

 44%|████▍     | 132/300 [58:07<56:51, 20.30s/it]
Request 27 took 106.43s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have 

prompts: 100%|██████████| 1/1 [01:18<00:00, 78.76s/it]
 44%|████▍     | 133/300 [59:26<1:03:59, 22.99s/it]

 45%|████▌     | 136/300 [59:26<1:02:50, 22.99s/it]
Request 28 took 78.77s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:10<00:00, 70.04s/it]
 46%|████▌     | 137/300 [1:00:36<58:11, 21.42s/it]

 46%|████▌     | 138/300 [1:00:36<57:49, 21.42s/it]
Request 29 took 70.04s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [02:14<00:00, 134.34s/it]
 46%|████▋     | 139/300 [1:02:51<1:18:20, 29.19s/it]

 47%|████▋     | 142/300 [1:02:51<1:16:52, 29.19s/it]
Request 30 took 134.34s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [00:57<00:00, 57.02s/it]
 48%|████▊     | 143/300 [1:03:48<1:03:37, 24.31s/it]

 48%|████▊     | 145/300 [1:03:48<1:02:48, 24.31s/it]
Request 31 took 57.03s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [02:02<00:00, 122.85s/it]
 49%|████▊     | 146/300 [1:05:50<1:13:28, 28.63s/it]

 50%|████▉     | 149/300 [1:05:50<1:12:02, 28.63s/it]
Request 32 took 122.86s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [02:42<00:00, 162.90s/it]
 50%|█████     | 150/300 [1:08:33<1:21:34, 32.63s/it]

 51%|█████▏    | 154/300 [1:08:33<1:19:23, 32.63s/it]
Request 33 took 162.90s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:13<00:00, 73.28s/it]
 52%|█████▏    | 155/300 [1:09:47<1:02:43, 25.96s/it]

 52%|█████▏    | 156/300 [1:09:47<1:02:17, 25.96s/it]
Request 34 took 73.28s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [02:45<00:00, 165.79s/it]
 52%|█████▏    | 157/300 [1:12:32<1:25:36, 35.92s/it]

 53%|█████▎    | 160/300 [1:12:32<1:23:49, 35.92s/it]
Request 35 took 165.80s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [02:40<00:00, 160.36s/it]
 54%|█████▎    | 161/300 [1:15:13<1:26:26, 37.31s/it]

 55%|█████▌    | 165/300 [1:15:13<1:23:57, 37.31s/it]
Request 36 took 160.37s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:15<00:00, 75.77s/it]
 55%|█████▌    | 166/300 [1:16:29<1:04:52, 29.05s/it]

 56%|█████▌    | 168/300 [1:16:29<1:03:53, 29.05s/it]
Request 37 took 75.77s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [01:37<00:00, 97.16s/it]
 56%|█████▋    | 169/300 [1:18:06<1:05:11, 29.86s/it]

 57%|█████▋    | 170/300 [1:18:06<1:04:41, 29.86s/it]
Request 38 took 97.17s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [01:52<00:00, 112.76s/it]
 57%|█████▋    | 171/300 [1:19:59<1:14:53, 34.83s/it]

 57%|█████▋    | 172/300 [1:19:59<1:14:18, 34.83s/it]
Request 39 took 112.77s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [02:22<00:00, 142.87s/it]
 58%|█████▊    | 173/300 [1:22:21<1:30:05, 42.57s/it]

 58%|█████▊    | 175/300 [1:22:21<1:28:40, 42.57s/it]
Request 40 took 142.88s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:53<00:00, 113.49s/it]
 59%|█████▊    | 176/300 [1:24:15<1:24:55, 41.09s/it]

 59%|█████▉    | 177/300 [1:24:15<1:24:14, 41.09s/it]
Request 41 took 113.49s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:31<00:00, 91.39s/it]
 59%|█████▉    | 178/300 [1:25:46<1:25:41, 42.15s/it]

 61%|██████    | 182/300 [1:25:46<1:22:53, 42.15s/it]
Request 42 took 91.40s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [01:00<00:00, 60.90s/it]
 61%|██████    | 183/300 [1:26:47<55:54, 28.67s/it]  

 61%|██████▏   | 184/300 [1:26:47<55:25, 28.67s/it]
Request 43 took 60.91s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:45<00:00, 105.64s/it]
 62%|██████▏   | 185/300 [1:28:33<1:04:25, 33.61s/it]

 63%|██████▎   | 188/300 [1:28:33<1:02:44, 33.61s/it]
Request 44 took 105.65s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:37<00:00, 97.86s/it]
 63%|██████▎   | 189/300 [1:30:11<55:56, 30.24s/it]  

 63%|██████▎   | 190/300 [1:30:11<55:26, 30.24s/it]
Request 45 took 97.87s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [03:53<00:00, 233.38s/it]
 64%|██████▎   | 191/300 [1:34:04<1:27:40, 48.26s/it]

 65%|██████▌   | 195/300 [1:34:04<1:24:27, 48.26s/it]
Request 46 took 233.38s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:36<00:00, 96.30s/it]
 65%|██████▌   | 196/300 [1:35:40<1:02:12, 35.89s/it]

 66%|██████▌   | 197/300 [1:35:40<1:01:36, 35.89s/it]
Request 47 took 96.31s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [01:32<00:00, 92.22s/it]
 66%|██████▌   | 198/300 [1:37:13<1:04:25, 37.89s/it]

 66%|██████▋   | 199/300 [1:37:13<1:03:47, 37.89s/it]
Request 48 took 92.23s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They ha

prompts: 100%|██████████| 1/1 [07:29<00:00, 449.65s/it]
 67%|██████▋   | 200/300 [1:44:42<2:11:19, 78.80s/it]

 67%|██████▋   | 201/300 [1:44:42<2:10:00, 78.80s/it]
Request 49 took 449.66s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [03:29<00:00, 209.55s/it]
 67%|██████▋   | 202/300 [1:48:12<2:18:48, 84.98s/it]

 69%|██████▊   | 206/300 [1:48:12<2:13:08, 84.98s/it]
Request 50 took 209.55s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [02:09<00:00, 129.36s/it]
 69%|██████▉   | 207/300 [1:50:21<1:29:36, 57.82s/it]

 70%|███████   | 211/300 [1:50:21<1:25:45, 57.82s/it]
Request 51 took 129.36s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They h

prompts: 100%|██████████| 1/1 [01:19<00:00, 79.72s/it]
 71%|███████   | 212/300 [1:51:41<1:00:27, 41.22s/it]

 71%|███████   | 213/300 [1:51:41<59:46, 41.22s/it]
Request 52 took 79.72s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:29<00:00, 89.43s/it]
 71%|███████▏  | 214/300 [1:53:10<1:00:00, 41.87s/it]

 72%|███████▏  | 217/300 [1:53:10<57:54, 41.87s/it]
Request 53 took 89.43s
Generated 4 instructions, kept 4 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:00<00:00, 60.48s/it]
 73%|███████▎  | 218/300 [1:54:11<44:35, 32.63s/it]  

 73%|███████▎  | 219/300 [1:54:11<44:03, 32.63s/it]
Request 54 took 60.49s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:51<00:00, 111.29s/it]
 73%|███████▎  | 220/300 [1:56:02<49:35, 37.19s/it]

 75%|███████▍  | 224/300 [1:56:02<47:06, 37.19s/it]
Request 55 took 111.31s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [03:38<00:00, 218.21s/it]
 75%|███████▌  | 225/300 [1:59:41<49:49, 39.86s/it]

 76%|███████▌  | 227/300 [1:59:41<48:29, 39.86s/it]
Request 56 took 218.22s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [00:45<00:00, 45.49s/it]
 76%|███████▌  | 228/300 [2:00:26<40:04, 33.39s/it]

 76%|███████▋  | 229/300 [2:00:26<39:30, 33.39s/it]
Request 57 took 45.49s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [04:50<00:00, 290.90s/it]
 77%|███████▋  | 230/300 [2:05:17<1:05:03, 55.77s/it]

 79%|███████▊  | 236/300 [2:05:17<59:29, 55.77s/it]
Request 58 took 290.90s
Generated 7 instructions, kept 7 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:07<00:00, 127.83s/it]
 79%|███████▉  | 237/300 [2:07:25<38:52, 37.03s/it]  

 80%|████████  | 241/300 [2:07:25<36:24, 37.03s/it]
Request 59 took 127.83s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:42<00:00, 162.27s/it]
 81%|████████  | 242/300 [2:10:07<34:18, 35.49s/it]

 82%|████████▏ | 246/300 [2:10:07<31:56, 35.49s/it]
Request 60 took 162.27s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [00:46<00:00, 46.04s/it]
 82%|████████▏ | 247/300 [2:10:53<23:47, 26.94s/it]

 83%|████████▎ | 249/300 [2:10:53<22:53, 26.94s/it]
Request 61 took 46.05s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:46<00:00, 106.49s/it]
 83%|████████▎ | 250/300 [2:12:40<24:00, 28.80s/it]

 85%|████████▍ | 254/300 [2:12:40<22:04, 28.80s/it]
Request 62 took 106.49s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [03:40<00:00, 220.17s/it]
 85%|████████▌ | 255/300 [2:16:20<25:30, 34.01s/it]

 86%|████████▋ | 259/300 [2:16:20<23:14, 34.01s/it]
Request 63 took 220.18s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:05<00:00, 125.46s/it]
 87%|████████▋ | 260/300 [2:18:25<20:43, 31.08s/it]

 88%|████████▊ | 264/300 [2:18:25<18:39, 31.08s/it]
Request 64 took 125.46s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [03:09<00:00, 189.53s/it]
 88%|████████▊ | 265/300 [2:21:35<19:24, 33.26s/it]

 89%|████████▉ | 267/300 [2:21:35<18:17, 33.26s/it]
Request 65 took 189.54s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [01:19<00:00, 79.90s/it]
 89%|████████▉ | 268/300 [2:22:55<16:58, 31.84s/it]

 90%|████████▉ | 269/300 [2:22:55<16:27, 31.84s/it]
Request 66 took 79.91s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:20<00:00, 80.28s/it]
 90%|█████████ | 270/300 [2:24:15<16:37, 33.25s/it]

 91%|█████████ | 272/300 [2:24:15<15:31, 33.25s/it]
Request 67 took 80.29s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [03:14<00:00, 194.91s/it]
 91%|█████████ | 273/300 [2:27:30<18:46, 41.72s/it]

 92%|█████████▏| 277/300 [2:27:30<15:59, 41.72s/it]
Request 68 took 194.92s
Generated 5 instructions, kept 5 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:16<00:00, 136.82s/it]
 93%|█████████▎| 278/300 [2:29:47<13:15, 36.14s/it]

 93%|█████████▎| 280/300 [2:29:47<12:02, 36.14s/it]
Request 69 took 136.83s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [02:36<00:00, 156.74s/it]
 94%|█████████▎| 281/300 [2:32:24<12:43, 40.17s/it]

 95%|█████████▌| 286/300 [2:32:24<09:22, 40.17s/it]
Request 70 took 156.75s
Generated 6 instructions, kept 6 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [01:17<00:00, 77.66s/it]
 96%|█████████▌| 287/300 [2:33:41<06:14, 28.83s/it]

 96%|█████████▌| 288/300 [2:33:41<05:45, 28.83s/it]
Request 71 took 77.66s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [00:59<00:00, 59.48s/it]
 96%|█████████▋| 289/300 [2:34:41<05:18, 28.98s/it]

 97%|█████████▋| 291/300 [2:34:41<04:20, 28.98s/it]
Request 72 took 59.49s
Generated 3 instructions, kept 3 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [00:37<00:00, 37.60s/it]
 97%|█████████▋| 292/300 [2:35:18<03:17, 24.68s/it]

 97%|█████████▋| 292/300 [2:35:18<03:17, 24.68s/it]
Request 73 took 37.60s
Generated 1 instructions, kept 1 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [01:09<00:00, 69.46s/it]
 98%|█████████▊| 293/300 [2:36:28<03:27, 29.64s/it]

 98%|█████████▊| 293/300 [2:36:28<03:27, 29.64s/it]
Request 74 took 69.47s
Generated 1 instructions, kept 1 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They have

prompts: 100%|██████████| 1/1 [02:22<00:00, 142.75s/it]
 98%|█████████▊| 294/300 [2:38:51<04:30, 45.11s/it]

 98%|█████████▊| 295/300 [2:38:51<03:45, 45.11s/it]
Request 75 took 142.76s
Generated 2 instructions, kept 2 instructions
### [[{'role': 'user', 'content': 'You will be given a set of tasks in a robotic environment.\nYou are asked to simulate the task instructions and corresponding responses happening during task solving.\nSome of them are long-horizon tasks request multiple reasoning steps, so we are generating multi-turn instructions in a chain of thought way. \nThese task instructions will be given to a GPT model and we will evaluate the GPT model performance on the generated responses.\n\n[Environment Description] \nThe environment is called "Block World". \nThere is a 7DOF Franka Panda robot with a parallel gripper, it has a force sensor on the end effector. The robot is mounted on a table.\nThere are multiple blocks on tha table, here we use 2 blocks as example. The blocks are called {block A} and {block B}. \nThe blocks are initialized at a random position on the table. They hav

prompts: 100%|██████████| 1/1 [01:55<00:00, 115.96s/it]
301it [2:40:47, 32.05s/it]                         

301it [2:40:47, 48.73s/it]
Request 76 took 115.97s
Generated 6 instructions, kept 6 instructions





In [5]:
import utils
utils.full_inst_json_to_full_form_json('./gpt4_generation/cot_full_v3/cot_instrut_regen.json', 
                                       './gpt4_generation/cot_full_v3/cot_instrut_regen_formatted.json')

In [51]:
data_lists = list(open("./gpt4_generation_cot/cot_instrut_regen_gorilla.jsonl"))

In [52]:
data = data_lists[0]
data = json.loads(data)['code']
if '###Instruction.' in data:
    data = data.replace('###Instruction.', '###Instruction:')
if 'Output:' in data:
    a = {'instruction':data.split('Output:')[0].split('Instruction:')[1].split('###')[0],
        'input': data.split('Output:')[0].split('Input:')[1].split('###')[0],
        'output': data.split('Output:')[1]}

In [61]:
data.split('Output:')[1]

" {'explanation': No. We don't know the weight of the blocks, need to infer weight first., 'code': <nooutput>.}\n"