In [1]:
import dataclasses
import io
import traceback

from matplotlib import pyplot as plt
import numpy as np
import PIL.Image

import sys
sys.path.append('../src/')

import chat
import llm_task

import enact

import asteroids



task_prompt = '''
Write a concise python function with the following signature.
Answer with a single code block.'''
code_gen_prompt = '''

```
def control(state: State) -> Tuple[float, float]:
  """Control an asteroids-style ship to move to a goal.
  
  Args:
    state: State object encoding the game state. Attributes include
      state.position: Numpy array representing the x and y player position.
      state.goal_position: Numpy array representing the x and y goal position.
      state.rotation: Numpy array representing the player rotation.

      Note that each of these attributes has a batch dimension of 1, so the
      current position of the player can be retrieved via state.position[0].

  Returns:
    A pair of floats representing:
      torque: The amount of torque to apply to the ship, between -1 and 1.
      thrust: The amount of thrust to apply to the ship, between 0.0 and 1
  """
  <INSERT IMPLEMENTATION HERE>
```

Do not define a State class but instead treat the class as already defined.
'''

@enact.typed_invokable(enact.Str, enact.Image)
class PolicyVisualizer(enact.Invokable):
  """Visualizes a policy provided (as a string)."""

  def call(self, code: enact.Str) -> enact.Image:
    """Plots policy trajectories."""
    def_dict = {}
    exec(code, def_dict)
    control = def_dict['control']
    def policy(state: asteroids.State) -> asteroids.Action:
      return asteroids.Action(np.array(control(state))[np.newaxis])

    _, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
   
    for ax in [ax1, ax2, ax3, ax4]:
      t = asteroids.create_trajectory(policy, batch_shape=(1,), steps=300)
      asteroids.plot_trajectory(t, axis=ax)
    b = io.BytesIO()
    plt.savefig(b, format='png')
    return enact.Image(PIL.Image.open(b))


@enact.typed_invokable(enact.Str, llm_task.ProcessedOutput)
class PolicyChecker(enact.Invokable):

  def call(self, input: enact.Str) -> llm_task.ProcessedOutput:
    if not input.startswith('```python') and not input.startswith('```'):
      return llm_task.ProcessedOutput(
        output=None, correction='Input must start with "```".')
    if not input.endswith('```'):
      return llm_task.ProcessedOutput(
        output=None, correction='Input must end with "```".')
    if '```' in input[3:-3]:
      return llm_task.ProcessedOutput(
        output=None, correction='Input must be a single code block```".')
    if input.startswith('```python'):
      code = input[len('```python`'):-len('```')]
    else:
      code = input[len('```'):-len('```')]
    def_dict = {}
    print(code)
    try:
      exec(code, def_dict)
    except Exception as e:
      return llm_task.ProcessedOutput(
        output=None,
        correction=f'Your code raised an exception while parsing: {e}\n{traceback.format_exc()}')
    control = def_dict.get('control')
    if not control:
      return llm_task.ProcessedOutput(
        output=None,
        correction='Your code did not define a `control` function.')
    try:
      result = control(asteroids.State(batch_shape=(1,)))
    except Exception as e:
      print(traceback.format_exc())
      return llm_task.ProcessedOutput(
        output=None,
        correction=f'Your code raised an exception while running: {e}\n{traceback.format_exc()}')
    try:
      thrust, torque = result
      thrust = float(thrust)
      torque = float(torque)
    except TypeError:
      return llm_task.ProcessedOutput(
        output=None,
        correction='Your code could not be unpacked into two float values.')
    print(code)
    critique = enact.RequestInput(enact.Str, 'Please critique the policy or leave empty if ok.')(
      PolicyVisualizer()(enact.Str(code)))
  
    if critique != '':
      return llm_task.ProcessedOutput(
        output=None, correction=f'User critique: {critique}')
    
    return llm_task.ProcessedOutput(
      output=code, correction=None)
      

code_gen = llm_task.Task(
  task_prompt=task_prompt,
  chat_agent=chat.GPTAgent(model='gpt-4'))
code_gen.add_example(
  '''```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```''',
  '''```python\ndef add(x: int, y: int):\n  return x + y\n```''')

@enact.typed_invokable(enact.NoneResource, enact.Str)
@dataclasses.dataclass
class CreatePolicy(enact.Invokable):
  code_gen: llm_task.Task
  
  def call(self):
    return self.code_gen(enact.Str(code_gen_prompt))


#store = enact.Store(backend=enact.FileBackend('/home/max/Documents/enact/examples/store_backend/'))
store = enact.Store()
with store:
  post_processor_ref = enact.commit(PolicyChecker())
  code_gen.post_processor = post_processor_ref
  code_gen.max_retries = 2
  create_policy = CreatePolicy(code_gen)


In [2]:
import enact.gradio
with store:
  ref = enact.commit(create_policy)
  gui = enact.gradio.GUI(
    ref,
    input_required_outputs=[enact.Image],
    input_required_inputs=[enact.Str])
  gui.launch(share=True)
  #gui.queue().launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://8afdb1d37127cb842d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


def control(state: State) -> Tuple[float, float]:
  """Control an asteroids-style ship to move to a goal."""
  
  # Calculate the vector to the goal.
  vector_to_goal = state.goal_position[0] - state.position[0]

  # Use atan2 to find the angle to the goal relative to the ship's orientation.
  angle_to_goal = np.arctan2(vector_to_goal[1], vector_to_goal[0]) - state.rotation[0]

  # Normalizes the angle to the range [-pi, pi]
  angle_to_goal = (angle_to_goal + np.pi) % (2 * np.pi) - np.pi

  # Calculate the proportion of the direction the ship should rotate.
  torque = angle_to_goal / np.pi

  # Calculate the proportion of the distance the ship should thrust.
  thrust = min(1.0, np.linalg.norm(vector_to_goal) / 100.0)
  
  return torque, thrust

import numpy as np
from typing import Tuple

class State:
  pass

def control(state: State) -> Tuple[float, float]:

  vector_to_goal = state.goal_position[0] - state.position[0]
 
  angle_to_goal = np.arctan2(vector_to_goal[1], vector_to_goal[0

In [21]:

np.array((0, 1.))

array([0., 1.])

In [24]:
def my_control(state):
    direction_to_goal = np.arctan2(state.goal_position[1] - state.position[1], state.goal_position[0] - state.position[0])
    rotation_difference = np.arctan2(np.sin(direction_to_goal - state.rotation), np.cos(direction_to_goal - state.rotation))

    if rotation_difference > 0:
        torque = 1
    else:
        torque = -1
  
    distance_to_goal = np.linalg.norm(state.goal_position - state.position)

    if distance_to_goal < 1:
        thrust = distance_to_goal
    else:
        thrust = 1

    return (torque, thrust)

In [6]:
s = asteroids.State(batch_shape=(1,))
s

<State: [[ 1.9548714  11.621289   -0.08679825  0.          0.          0.
  16.246428   11.920341    0.        ]]>

In [7]:
s.position[0]

array([ 1.9548714, 11.621289 ], dtype=float32)

In [26]:
my_control(s)

(-1, 1)

In [12]:
gui._invokable.get()

CreatePolicy(code_gen=Task(task_prompt='\nWrite a concise python function with the following signature.\nAnswer with a single code block.', examples=[TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')], corrections=[], post_processor=<Ref: b4a2c77e2d5e3e325ed9d46290d6e659fbd2c6913d4474a64a19adb0db46759f>, max_retries=10, chat_agent=GPTAgent(model='gpt-4')))

In [30]:
digest_str = '{"digest": "2e265fb9494ecfb1f38fede82760e0110aac65a02e1928b8042e2e74d28ca96c"}'

In [31]:
with store:
  reference = enact.Ref.from_id(digest_str)
  enact.pprint(reference())

Invocation:
  request:
    -> Request#958f82:
      invokable:
        -> CreatePolicy#b08e74:
          code_gen:
            Task:
              task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
              examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')]
              corrections: []
              post_processor: -> PolicyChecker()#b4a2c7
              max_retries: 10
              chat_agent: GPTAgent(model='gpt-4')
      input: -> NoneResource()#4c96e8
  response:
    -> Response#dc0394:
      invokable:
        -> CreatePolicy#b08e74:
          code_gen:
            Task:
              task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
              examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```'

In [34]:
reference().response().output()

'import numpy as np\nfrom typing import Tuple\n\ndef control(position: np.ndarray,\n            goal: np.ndarray,\n            orientation: float) -> Tuple[float, float]:\n  """Control an asteroids-style ship to move to a goal.\n  \n  Args:\n    position: A 2D vector representing the position of the ship.\n    goal: A 2D vector representing the position of the goal.\n    orientation: A float representing the orientation of the ship in radians.\n  Returns:\n    A pair of floats representing:\n      torque: The amount of torque to apply to the ship, between -1 and 1.\n      thrust: The amount of thrust to apply to the ship, between 0.0 and 1\n  """\n  diff = goal - position\n  angle_to_goal = np.arctan2(diff[1], diff[0])\n  angle_diff = (angle_to_goal - orientation) % (2*np.pi)\n  if angle_diff > np.pi:\n    angle_diff -= 2 * np.pi\n  torque = np.clip(angle_diff, -1, 1)\n  distance = np.linalg.norm(diff)\n  thrust = np.clip(distance, 0.0, 1.0)\n  return torque, thrust\n  '

In [22]:
enact.pprint(reference().response())

Response:
  invokable:
    -> CreatePolicy#b08e74:
      code_gen:
        Task:
          task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
          examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')]
          corrections: []
          post_processor: -> PolicyChecker()#b4a2c7
          max_retries: 10
          chat_agent: GPTAgent(model='gpt-4')
  output: None
  raised:
    -> InputRequest#28262e:
      args:
        [
          -> RequestInput#9c306e:
            requested_type: Str
            context: 'Please critique the policy or leave empty if ok.'
          -> Image(data=<16778 bytes>)#3d4c38
          Str
          'Please critique the policy or leave empty if ok.']
  raised_here: False
  children:
    [
      -> Invocation#4235d8:
        request:
          -> Request#814ab0:
            invokable:
 

In [None]:
reference().output()

In [25]:
enact.pprint(reference().response().children[-1])

-> Invocation#4235d8:
  request:
    -> Request#814ab0:
      invokable:
        -> Task#5e59b1:
          task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
          examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')]
          corrections: []
          post_processor: -> PolicyChecker()#b4a2c7
          max_retries: 10
          chat_agent: GPTAgent(model='gpt-4')
      input: -> '\n\n```\ndef control(position: np.ndarray,\n            goal: np.ndarray,\n            orientation: float) -> Tuple[float, float]\n  """Control an asteroids-style ship to move to a goal.\n  \n  Args:\n    position: A 2D vector representing the position of the ship.\n    goal: A 2D vector representing the position of the goal.\n    orientation: A float representing the orientation of the ship in radians.\n  Returns:\n    A pair of float

In [9]:
ref.get().__name__

AttributeError: 'CreatePolicy' object has no attribute '__name__'

In [7]:
ref.digest

'b08e74828f820f2a09f4e56cb5c9512b2327963acf1a2d608e5a10123baf10a3'

In [None]:
digest_str = 
ref_id_str = '{"digest": "a1c474e023fc3a442a79f062c0eb927ff4d47fa37539f9098042b31f2f67c015"}'
with store:
  reference = enact.Ref.from_id(ref_id_str)
  enact.pprint(reference())

In [3]:
single__soln = """
import numpy as np
from typing import Tuple

def control(position: np.ndarray,
            goal: np.ndarray,
            orientation: float) -> Tuple[float, float]:
  
  direction = np.arctan2(goal[1] - position[1], goal[0] - position[0])
  torque = direction - orientation

  while torque > np.pi: torque -= 2 * np.pi
  while torque < -np.pi: torque += 2 * np.pi

  torque = np.clip(torque, -1, 1)    

  distance = np.linalg.norm(goal - position)

  if distance > 1:
    thrust = 1
  else:
    thrust = distance

  return torque, thrust
"""

In [13]:
res_id = '{"digest": "2b70ac2a8795963a9816b78f9c72aecb14c237950939e627c86c165fc5522e35"}'
with store:
  ref = enact.Ref(res_id)
  print(ref.id)
  print(ref.get())

{"digest": "{\"digest\": \"2b70ac2a8795963a9816b78f9c72aecb14c237950939e627c86c165fc5522e35\"}"}


NotFound: {"digest": "{\"digest\": \"2b70ac2a8795963a9816b78f9c72aecb14c237950939e627c86c165fc5522e35\"}"}

In [3]:
ref

<Ref: b08e74828f820f2a09f4e56cb5c9512b2327963acf1a2d608e5a10123baf10a3>

In [4]:
enact.digests.digest(ref.id)

'023eb4b9ac93d3780977f682b183198e6b1d8027d26354a6d068fa3562d93ecf'

In [5]:
ref

<Ref: b08e74828f820f2a09f4e56cb5c9512b2327963acf1a2d608e5a10123baf10a3>

In [6]:
res = ref.get()
enact.pprint(res)

CreatePolicy:
  code_gen:
    Task:
      task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
      examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')]
      corrections: []
      post_processor: -> PolicyChecker()#b4a2c7
      max_retries: 10
      chat_agent: GPTAgent(model='gpt-4')


In [7]:
res.code_gen

Task(task_prompt='\nWrite a concise python function with the following signature.\nAnswer with a single code block.', examples=[TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')], corrections=[], post_processor=<Ref: b4a2c77e2d5e3e325ed9d46290d6e659fbd2c6913d4474a64a19adb0db46759f>, max_retries=10, chat_agent=GPTAgent(model='gpt-4'))

In [8]:
res.code_gen.corrections

[]

In [33]:
correction_str = res.code_gen.corrections[0].input
enact.pprint(correction_str)

'\n\n```\ndef control(position: np.ndarray,\n            goal: np.ndarray,\n            orientation: float) -> Tuple[float, float]\n  """Control an asteroids-style ship to move to a goal.\n  \n  Args:\n    position: A 2D vector representing the position of the ship.\n    goal: A 2D vector representing the position of the goal.\n    orientation: A float representing the orientation of the ship in radians.\n  Returns:\n    A pair of floats representing:\n      torque: The amount of torque to apply to the ship, between -1 and 1.\n      thrust: The amount of thrust to apply to the ship, between 0.0 and 1\n  """\n  <INSERT IMPLEMENTATION HERE>\n```\n'


In [34]:
exec(correction_str)

SyntaxError: invalid syntax (<string>, line 3)

In [44]:
len(res.code_gen.corrections)

1

In [47]:
res.code_gen.corrections[0].output

'```python\ndef control(position: np.ndarray,\n            goal: np.ndarray,\n            orientation: float) -> Tuple[float, float]:\n  direction_to_goal = goal - position\n  angle_to_goal = np.arctan2(direction_to_goal[1], direction_to_goal[0])\n  \n  angle_difference = angle_to_goal - orientation\n  \n  if angle_difference > np.pi:\n    angle_difference -= 2 * np.pi\n  elif angle_difference < -np.pi:\n    angle_difference += 2 * np.pi\n    \n  torque = np.clip(angle_difference, -1, 1)\n  \n  distance_to_goal = np.linalg.norm(direction_to_goal)\n  \n  thrust = np.clip(distance_to_goal / 20, 0, 1)\n  \n  return torque, thrust\n```'

In [48]:
res.code_gen.get_output_type()

enact.resource_types.Str

In [9]:
res.code_gen.post_processor

<Ref: b4a2c77e2d5e3e325ed9d46290d6e659fbd2c6913d4474a64a19adb0db46759f>

In [10]:
pp_ref = res.code_gen.post_processor
pp_ref.get()

PolicyChecker()

In [53]:
pp_ref.get().output

AttributeError: 'PolicyChecker' object has no attribute 'output'

In [55]:
res.call()

InputRequestOutsideInvocation: ('Please critique the policy or leave empty if ok.', <class 'enact.resource_types.Str'>)

In [56]:
res

CreatePolicy(code_gen=Task(task_prompt='\nWrite a concise python function with the following signature.\nAnswer with a single code block.', examples=[TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')], corrections=[Correction(input='\n\n```\ndef control(position: np.ndarray,\n            goal: np.ndarray,\n            orientation: float) -> Tuple[float, float]\n  """Control an asteroids-style ship to move to a goal.\n  \n  Args:\n    position: A 2D vector representing the position of the ship.\n    goal: A 2D vector representing the position of the goal.\n    orientation: A float representing the orientation of the ship in radians.\n  Returns:\n    A pair of floats representing:\n      torque: The amount of torque to apply to the ship, between -1 and 1.\n      thrust: The amount of thrust to apply to the ship, between 0.0 and 1\n  """\n  <INSERT IMPLEMENTATION HERE>\n```\n', output='```

In [11]:
post_processor_ref

<Ref: b4a2c77e2d5e3e325ed9d46290d6e659fbd2c6913d4474a64a19adb0db46759f>

In [16]:
pp_res = post_processor_ref.get()
enact.pprint(pp_res)

PolicyChecker()


In [19]:
pp_res.call('hello')

ProcessedOutput(output=None, correction='Input must start with "```".')

In [20]:
pp_res.response()

AttributeError: 'PolicyChecker' object has no attribute 'response'

In [21]:
ref.response()

AttributeError: 'Ref' object has no attribute 'response'

In [22]:
enact.pprint(ref)

-> CreatePolicy#b08e74:
  code_gen:
    Task:
      task_prompt: '\nWrite a concise python function with the following signature.\nAnswer with a single code block.'
      examples: [ TaskExample(input='```def add(x: int, y: int):\n  <INSERT IMPLEMENTATION HERE>```', output='```python\ndef add(x: int, y: int):\n  return x + y\n```')]
      corrections: []
      post_processor: -> PolicyChecker()#b4a2c7
      max_retries: 10
      chat_agent: GPTAgent(model='gpt-4')


In [24]:
res = ref.get()

In [28]:
res.code_gen.chat_agent.field_names()

<generator object Resource.field_names.<locals>.<genexpr> at 0x7f798496f0b0>