In [None]:
import os
import time

from futurehouse_client import FutureHouseClient
from futurehouse_client.models import TaskRequest, RuntimeConfig
from futurehouse_client.models.app import AuthType
import fhda.prompts as prompts

In [None]:
# Instantiate the FutureHouse client with your API key
FH_API_KEY = ""  # Add your API key here
JOB_NAME = "job-futurehouse-data-analysis-crow-high"  # Don't change this
UPLOAD_ID = (
    "finch_tutorial"  # This is the folder name of the dataset you uploaded to GCS
)

client = FutureHouseClient(
    auth_type=AuthType.API_KEY,
    api_key=FH_API_KEY,
)

In [None]:
# Load your dataset – note you only have to do this once
client.upload_file(JOB_NAME, file_path="dataset", upload_id=UPLOAD_ID)

In [None]:
# Check what files were uploaded to your gcs folder
client.list_files(JOB_NAME, upload_id=UPLOAD_ID)

In [None]:
# Define your task
# Here is where you can update the prompt. As shown below, by default we use CoT prompting,
# but it is not necessary and we encourage users to experiment with different prompting strategies.
LANGUAGE = "PYTHON"  # Choose between "R" and "PYTHON"
MAX_STEPS = 30  # You can change this to impose a limit on the number of steps the agent can take
query = "Make a short notebook with visualizations exploring the dataset."

task = (
    f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n"
    f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}"
    f"Here is the research question to address:\n"
    f"<query>\n"
    f"{query}\n"
    f"</query>\n"
)

# This is extra R prompting to avoid long R output blocks – also feel free to discard this
if LANGUAGE == "R":
    task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"

In [None]:
# This is how to create a task – you shouldn't need to change anything here
job_data = TaskRequest(
    name=JOB_NAME,
    query=task,
    runtime_config=RuntimeConfig(
        max_steps=MAX_STEPS,
        upload_id=UPLOAD_ID,
        environment_config={
            "eval": True,  # DO NOT CHANGE THIS
            "language": LANGUAGE,
        },
    ),
)
trajectory_id = client.create_task(job_data)
print(
    f"Task running on platform, you can view progress live at:https://platform.futurehouse.org/trajectories/{trajectory_id}"
)

In [None]:
# Jobs take on average 3-10 minutes to complete
status = "in progress"
while status in ["in progress", "queued"]:
    time.sleep(15)
    status = client.get_task(trajectory_id).status

if status == "failed":
    raise Exception("Task failed")

job_result = client.get_task(trajectory_id, verbose=True)
answer = job_result.environment_frame["state"]["state"]["answer"]
print(f"The agent's answer to your research question is: \n{answer}")

In [None]:
# In addition to viewing the notebook and reasoning trace via the platform,
# you can also list the files in the trajectory directory and download any files you need
print(client.list_files(JOB_NAME, trajectory_id=trajectory_id))

destination_path = "output/notebook.ipynb"
file_path = "notebook.ipynb"
client.download_file(
    JOB_NAME,
    trajectory_id=trajectory_id,
    file_path=file_path,
    destination_path=destination_path,
)
print(f"Notebook saved to {os.path.abspath(destination_path)}")