In [None]:
# PLATFORM ROLLOUT
import os
import time
import json
from futurehouse_client import FutureHouseClient
from futurehouse_client.models import Stage, TaskRequest, RuntimeConfig
from futurehouse_client.models.app import AuthType
import fhda.prompts as prompts
from ldp.agent import AgentConfig

# CONFIGURATION
CROW_STAGE = Stage.PROD  # Don't change this
API_KEY = ""  # Add your API key here
JOB_NAME = "job-futurehouse-data-analysis-crow-high"  # Don't change this
MAX_STEPS = 30  # You can change this to impose a limit on the number of steps
LANGUAGE = "R"  # Choose between "R" and "PYTHON"
DATA_GCS_LOCATION = "eda/flow0"  # This is the location of the dataset on GCS – ask someone from FutureHouse to upload new datasets
MODEL_NAME = "claude-3-7-sonnet-latest"  # Feel free to use any Litellm supported model
TEMPERATURE = 1.0  # Feel free to try different model temperatures

# Here is where you can update the prompt. As shown below, by default we use CoT prompting,
# but it is not necessary and we encourage users to experiment with different prompting strategies.
query = """
Make a discovery using this dataset
"""

task = f"""\
Here is the user query to address:

<query>
{query}
</query>
{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}
{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}"""

# This is extra R prompting to avoid long R output blocks
if LANGUAGE == "R":
    task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"


# You shouldn't have to change anything below here
client = FutureHouseClient(
    stage=CROW_STAGE,
    auth_type=AuthType.API_KEY,
    api_key=API_KEY,
)

job_data = TaskRequest(
    name=JOB_NAME,
    query=task,
    runtime_config=RuntimeConfig(
        max_steps=MAX_STEPS,
        upload_id=DATA_GCS_LOCATION,  # This is just an example dataset
        environment_config={
            "run_notebook_on_edit": False,
            "eval": True,  # DO NOT CHANGE THIS
            "language": LANGUAGE,
        },
        agent=AgentConfig(
            agent_type="ReActAgent",
            agent_kwargs={
                "llm_model": {"name": MODEL_NAME, "temperature": TEMPERATURE},
            },
        ),
    ),
)
job_id = client.create_task(job_data)
status = "in progress"
while status in ["in progress", "queued"]:
    print("Waiting for task to complete... checking again in 30 seconds")
    time.sleep(15)
    status = client.get_task(job_id).status

if status == "failed":
    raise Exception("Task failed")

job_result = client.get_task(job_id, verbose=True)
answer = job_result.environment_frame["state"]["state"]["answer"]
print(
    f"Task completed, the full analysis is available at:https://platform.futurehouse.org/trajectories/{job_id}\n Agent answer: {answer}"
)

In [None]:
# You can also view the notebook locally by saving it to a directory of your choice
# Define the path where you want to save the notebook
notebook_path = "output/analysis_notebook.ipynb"

os.makedirs(os.path.dirname(notebook_path), exist_ok=True)
notebook_content = job_result.environment_frame["state"]["state"]["nb_state"]
with open(notebook_path, "w") as f:
    json.dump(notebook_content, f, indent=2)

print(f"Notebook saved to {os.path.abspath(notebook_path)}")