## Utils

### Imports

In [5]:
import requests
from environments.alfworld.common.environment import ALFWorldEnv

import os
from langchain_openai import ChatOpenAI
from planning_library.strategies import ADaPTStrategy
from planning_library.strategies.adapt.components import ADaPTPlanner, ADaPTExecutor
from planning_library.action_executors import LangchainActionExecutor, MetaTools
from planning_library.utils.gym_env_reset_tool import GymEnvResetTool
from textwrap import dedent

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Logging

In [6]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "ALFWorld + ADaPT (new)"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Environment

In [7]:
os.environ["ALFWORLD_DATA"] = "utils/data"

if not "base_config.yaml" in os.listdir("utils"):
    response = requests.get("https://raw.githubusercontent.com/alfworld/alfworld/master/configs/base_config.yaml")
    
    with open("utils/base_config.yaml", "wb") as f:
        f.write(response.content)

env = ALFWorldEnv(config_path="utils/base_config.yaml")
meta_tools = MetaTools(reset=GymEnvResetTool(env=env))
action_executor = LangchainActionExecutor(env.tools, meta_tools=meta_tools)

Initializing AlfredTWEnv...


100%|██████████| 8810/8810 [00:03<00:00, 2813.19it/s]

Overall we have 3553 games in split=train
Training with 3553 games





In [8]:
obs, info = env.reset(options={"next_episode": True})
print(obs)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.

Your task is to: put some pencil on shelf.


## Strategy

### Hyperparameters

In [9]:
# ADaPT hyperparameters
max_depth = 3  # maximum decomposition depth
executor_max_iterations = 2  # maximum number of iterations for the executor; when reached, the loop will exit
planner_max_iterations = 20  # maximum number of iterations for the planner; when reached, the loop will exit

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

### Executor

Default Executor is powered by any strategy from the library. By default, a simple one is used (see `planning_library/strategies/simple/simple_strategy.py` for more details).

In [10]:
executor = ADaPTExecutor.create_simple_strategy(llm=ChatOpenAI(model=model_name, temperature=temperature),
                                             parser_name="openai-tools",
                                                user_message=dedent("""
            You are ALFRED, an intelligent agent navigating in a household.

            {inputs}"""),      max_iterations=executor_max_iterations,
                                                return_intermediate_steps=True,
                                                return_finish_log=True,
                                                action_executor=action_executor)

### Planner

Planner can either be:

* `agent`: llm with tools, can be powered by any strategy from the library. By default, a simple one is used (see `planning_library/strategies/simple/simple_strategy.py` for more details).
* `simple`: a single call to llm.

In [11]:
agent_planner = ADaPTPlanner.create_agent_planner(llm=ChatOpenAI(model=model_name, temperature=temperature),
                                             parser_name="openai-tools", 
                                                user_message=dedent("""
            Here is the essence of the original task:

            {inputs}
            """),
            executor_parser_name="openai-tools")

In [12]:
simple_planner = ADaPTPlanner.create_simple_planner(llm=ChatOpenAI(model=model_name, temperature=temperature),
                                                user_message=dedent("""
            Here is the essence of the original task:

            {inputs}
            """),
            executor_parser_name="openai-tools")

## Defining strategy

In [13]:
agent_planner_adapt = ADaPTStrategy(
    executor=executor,
    planner=agent_planner,
    max_depth=max_depth,
    return_intermediate_steps=True,
    return_finish_log=True,
)

In [14]:
agent_planner_adapt.invoke(
    {"inputs": {"inputs": obs}}
)



[1m> Entering new ADaPTStrategy chain...[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3mI tried to put the pencil on the shelf, but nothing happened. I will attempt to complete the task again.[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3mI have set up the first subtask in the plan, which is to "Pick up a pencil from the desk 1" with the requirement to successfully complete this task. The aggregation mode is set to "and", meaning all subtasks need to be completed for the overall task to be considered successful. If you have any additional steps or adjustments you'd like to make, feel free to let me know![0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3mI have added a subtask to try picking up the pencil from desk 1.[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy 

{'inputs': {'inputs': '-= Welcome to TextWorld, ALFRED! =-\n\nYou are in the middle of a room. Looking quickly around you, you see a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.\n\nYour task is to: put some pencil on shelf.'},
 'intermediate_steps': [[]],
 'finish_log': ["Couldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Maximum decomposition depth reached."]}

In [8]:
simple_planner_adapt = ADaPTStrategy(
    executor=executor,
    planner=simple_planner,
    max_depth=max_depth,
    return_intermediate_steps=True,
    return_finish_log=True,
)

In [9]:
simple_planner_adapt.invoke(
    {"inputs": {"inputs": obs}}
)



[1m> Entering new ADaPTStrategy chain...[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3mIt seems I couldn't go to shelf 1. Let me try again.[0m

[1m> Finished chain.[0m


[1m> Entering new SimpleStrategy chain...[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m
[32;1m[1;3mCouldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Maximum decomposition depth reached.[0m

[1m> Finished chain.[0m


{'inputs': {'inputs': '-= Welcome to TextWorld, ALFRED! =-\n\nYou are in the middle of a room. Looking quickly around you, you see a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 1, and a sidetable 1.\n\nYour task is to: put some pencil on shelf.'},
 'intermediate_steps': [[]],
 'finish_log': ["Couldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Couldn't solve the task. Last log: Maximum decomposition depth reached."]}