In [1]:
import os
import hashlib
import shutil
from pathlib import Path
import time

from ldp.agent import AgentConfig
from ldp.alg.rollout import RolloutManager
from ldp.data_structures import Trajectory, Transition

from fhda.data_analysis_env import DataAnalysisEnv
from fhda.notebook_env import NBEnvironment
from fhda.utils import NBLanguage
from fhda import prompts

In [2]:
# ENVIRONMENT CONFIGURATION

# Set your API keys
os.environ["ANTHROPIC_API_KEY"] = "YOUR_ANTHROPIC_API_KEY"
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
# If using docker, be sure to pull the image from docker hub first
# docker pull futurehouse/bixbench:aviary-notebook-env
# This image includes many bioinformatics and data science packages
os.environ["use_docker"] = "False"


def setup_data_analysis_env(query: str, dataset_folder: Path):
    # Hash the task to get a unique identifier
    task_hash = hashlib.sha256(query.encode()).hexdigest()
    trajectory_path = Path("tmp_results_dir") / f"{task_hash}-{time.time()}"
    trajectory_path.mkdir(parents=True, exist_ok=True)
    nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
    # Copy task data to trajectory path
    for item in dataset_folder.iterdir():
        if item.is_file():
            shutil.copy2(item, trajectory_path)
        elif item.is_dir():
            shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)

    # Augment incoming task with CoT instructions
    augmented_task = f"""\
    Here is the user query to address:

    <query>
    {query}
    </query>

    {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}
    {prompts.GENERAL_NOTEBOOK_GUIDELINES}"""

    # This can be R or PYTHON in Docker or with a local kernel if you have R installed
    language = NBLanguage.PYTHON
    if language == NBLanguage.R:
        augmented_task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"

    dae = DataAnalysisEnv(
        problem_id=f"data-analysis-task-{task_hash}",
        problem=augmented_task,
        eval_mode=None,
        nb_path=nb_path,
        work_dir=trajectory_path,
        language=language,
        system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
        use_tmp_work_dir=False,
    )
    return dae

In [3]:
# AVIARY ROLLOUT
# This folder only contains a single csv file on animal brain size and body mass from here:
# https://animaltraits.org/
# However, it could contain many files including nested folders
dataset_folder = Path("dataset")
query = "Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows."
environment = setup_data_analysis_env(query, dataset_folder)

agent = AgentConfig(
    agent_type="ReActAgent",
    agent_kwargs={
        "llm_model": {
            "model": "claude-3-7-sonnet-20250219",  # any litellm supported model will work
            "parallel_tool_calls": False,
            "num_retries": 5,
            "temperature": 1.0,
        },
        "hide_old_env_states": True,
    },
)

agent = agent.construct_agent()
rollout = RolloutManager(agent=agent)

# You can see the notebook updating live in the tmp_results_dir folder
result = await rollout.sample_trajectories(environments=[environment], max_steps=10)

print(f"Trajectory completed! Final notebook available at: \n", environment.nb_path)
print(f"Final agent answer:\n{environment.state.answer}")

Encountered exception during tool call for tool edit_cell: ValueError("Error executing cell 0:\nName: ModuleNotFoundError\nValue: No module named 'rpy2'\n")


Trajectory completed! Final notebook available at: 
 tmp_results_dir/bf222a115d3970be6e12430b1cd57eb67d2f36b1950ed5765a247a1f071e7569-1741036231.876748/notebook.ipynb
Final agent answer:
Based on our comprehensive analysis of the brain_size_data.csv dataset with a focus on crows (Corvidae family), we can make the following key observations:

1. DATA REPRESENTATION:
   - Corvid species are well-represented in the dataset with 73 entries (7.6% of all bird entries)
   - The dataset contains 68 unique corvid species including various crows, jays, magpies, and ravens
   - Data completeness for corvids is excellent for brain size (97.3%) and good for body mass (69.9%), but poor for metabolic rate (only 2.7%)

2. BRAIN SIZE CHARACTERISTICS:
   - Corvids have significantly larger brains than other birds, with an average brain size of 5.68 grams compared to 3.76 grams for other birds
   - Corvids have approximately 1.51 times larger brains than the average bird
   - The largest-brained corvid s

In [4]:
# INSPECT THE RESULT
trajectory = result[0]
# You can inspect each step in the trajectory and see what the agent's reasoning was,
# what tool it called, and what the observation was
for c, step in enumerate(trajectory.steps):
    print(f"Timestep {c}")
    print(f"Done: {step.done}")
    print("Agent Reasoning:")
    for message in step.agent_state.messages:
        if message.content:
            print(f"Message: {message.content[:200]} [Truncated]")
    # print(f"Observation: {step.observation[:200]} [Truncated]")
    print(f"Action: {step.action.value}")
    print("---")

Timestep 0
Done: False
Agent Reasoning:
Action: Tool request message '' for tool calls: list_workdir() [id=toolu_01Jp9WASaSKkeFaXzk74on7V]
---
Timestep 1
Done: False
Agent Reasoning:
Message:     Here is the user query to address:

    <query>
    Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.
    </query>

    
Follow these  [Truncated]
Message: [{"type": "text", "text": "Markdown representation of notebook contents (/workspace/notebook.ipynb):\n\n"}] [Truncated]
Message: 
You are an expert data scientist.
Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query
The notebook should contain all n [Truncated]
Message: Thought: I'll analyze the dataset with a focus on crows as requested. Let's start by exploring the directory to understand what data we have available.

Thought: First, I need to list the working dire [Truncated]
Message: Continue... [

In [5]:
# VANILLA ROLLOUT - this is a simple version of the what the rollout Manager does
dataset_folder = Path("dataset")
query = "Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows."
environment = setup_data_analysis_env(query, dataset_folder)

obs, tools = await environment.reset()
agent_state = await agent.init_state(tools)
trajectory = Trajectory()
max_steps = 10
for timestep in range(max_steps):
    action, next_agent_state, value = await agent.get_asv(agent_state, obs)
    next_obs, reward, done, trunc = await environment.step(action.value)
    # Create the transition object
    transition = Transition(
        timestep=timestep,
        agent_state=agent_state,
        next_agent_state=next_agent_state,
        observation=obs,
        next_observation=next_obs,
        action=action,
        reward=reward,
        done=done,
        truncated=trunc,
        value=value,
    )
    # Update steps by creating a new list with the additional transition
    trajectory.steps = [*trajectory.steps, transition]
    if done or trunc:
        break

    agent_state = next_agent_state
    obs = next_obs

Encountered exception during tool call for tool edit_cell: ValueError("Error executing cell 0:\nName: OSError\nValue: 'seaborn-whitegrid' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)\n")
