In [1]:
# One of the key factors of Eureka is that it tries 32 different approaches before taking the best one and its history. This is very similar to a MCTS.

In [None]:
# Pseudocode:
# 1 system prompt to just write a train.py file
# 2 run this 32 times
# 3 execute all 32
# 4 take the best one and save its training history

# Prompt 2: Reward reflection and feedback
# 5 reward reflection and feedback
# 6 run this 32 times
# 7 execute all 32
# 8 take the best one and save its training history
# 9 repeat

In [1]:
import os

# Define the new working directory relative to the current working directory
# or use an absolute path
new_working_directory = os.path.join(os.getcwd(), '..', '..', '..') # Set to MLAgentBenhc
print("New working directory: ", new_working_directory)

# Change the current working directory
os.chdir(new_working_directory)

print("New Working Directory:", os.getcwd())

New working directory:  c:\Users\kevihuang\OneDrive - Microsoft\Desktop\projects\MLAgentBench\MLAgentBench_v2\agents\Eureka\..\..\..
New Working Directory: c:\Users\kevihuang\OneDrive - Microsoft\Desktop\projects\MLAgentBench


In [2]:
from MLAgentBench_v2.agents.agent import Agent
import numpy as np 
import json
import logging 
import os
import re
import subprocess
from pathlib import Path
import shutil
import time 
import types
import copy
import ast

In [3]:
# Instantiate an environment
from types import SimpleNamespace
from MLAgentBench_v2.environment import Environment

args = SimpleNamespace(
    task='home-data-for-ml-course',
    task_type='kaggle',
    log_dir='logs/house-price-testing_eureka_gpt4_v3',
    work_dir='workspace',
    max_steps=50,
    max_time=18000,
    device=0,
    python='/home/user/micromamba/envs/autogpt/bin/python',
    interactive=False,
    resume=None,
    resume_step=0,
    agent_type='VoyagerAgent', # Just for instantiation -- doesn't actually do anything
    # llm_name='gpt-3.5-turbo-1106',
    # fast_llm_name='gpt-3.5-turbo-1106',
    # edit_script_llm_name='gpt-3.5-turbo-1106',
    llm_name='gpt-4-1106-preview',
    fast_llm_name='gpt-4-1106-preview',
    edit_script_llm_name='gpt-4-1106-preview',
    edit_script_llm_max_tokens=4000,
    agent_max_steps=50,
    actions_remove_from_prompt=[],
    actions_add_to_prompt=[],
    no_retrieval=False,
    valid_format_entires=None,
    max_steps_in_context=3,
    max_observation_steps_in_context=3,
    max_retries=4,
    langchain_agent='zero-shot-react-description'
)

env = Environment(args)

No module named 'helm'
Could not load CRFM API key crfm_api_key.txt.
[Errno 2] No such file or directory: 'claude_api_key.txt'
Could not load anthropic API key claude_api_key.txt.
Initializing environment...
args namespace(task='home-data-for-ml-course', task_type='kaggle', log_dir='logs/house-price-testing_eureka_gpt4_v3', work_dir='workspace', max_steps=50, max_time=18000, device=0, python='/home/user/micromamba/envs/autogpt/bin/python', interactive=False, resume=None, resume_step=0, agent_type='VoyagerAgent', llm_name='gpt-4-1106-preview', fast_llm_name='gpt-4-1106-preview', edit_script_llm_name='gpt-4-1106-preview', edit_script_llm_max_tokens=4000, agent_max_steps=50, actions_remove_from_prompt=[], actions_add_to_prompt=[], no_retrieval=False, valid_format_entires=None, max_steps_in_context=3, max_observation_steps_in_context=3, max_retries=4, langchain_agent='zero-shot-react-description')
Preparing task home-data-for-ml-course , of type:  kaggle


In [None]:
# Define the directory path you want to create
directory_path = env.work_dir + '/eureka'

# Check if the directory already exists to avoid errors
if not os.path.exists(directory_path):
    os.makedirs(directory_path)

In [8]:
# Pseudocode:
# 1 system prompt to just write a train.py file

class EurekaAgent(Agent):
    def __init__(self, env, completed_tasks=[], failed_tasks=[]):
        super().__init__(env)
        self.completed_tasks = env.completed_tasks
        self.failed_tasks = env.failed_tasks

    def initial_system_prompt(self, env):
        self.system_prompt_initial = f'''You are a machine learning engineer trying to write machine learning pipelines to solve machine learning tasks as effectively as possible.

Your goal is to write a model training script for the environment that will help the model achieve the highest accuracy possible.

Research Goal: {self.research_problem}

I will give you the following information:
Files: these are my current files and its contents that I have in my working directory.

The output format should be executable python code, and only code.
'''
        self.user_prompt_initial = ""
        for file in self.files:
            args = {
                'file_name': file, 
                'update_files_action_result_history': False
            }
            self.user_prompt_initial += f'''\nFilename: {file}. Content: {self.available_actions['readFile'](**args)}\n'''
        print("\nSystem prompt: \n" + self.system_prompt_initial)
        print("\nUser prompt: \n" + self.user_prompt_initial)

        complete_text_args = {
            'system_prompt': self.system_prompt_initial,
            'user_prompt': self.user_prompt_initial,
            'max_tokens': 4096,
            'temperature': 1.0,
            'top_p': 1.0,
            'update_files_action_result_history': False,
        }

        results = []
        for i in range(2):
            output = env.complete_text_openai(**complete_text_args) # increase randomness as much as possible
            print("\nOutput: \n" + output)
            
            # TODO: potentially use the assistants API to take the output and ensure that it's in proper pythom format
            write_as_python_code_args = {
                'system_prompt': 'You are a helpful assistant. Please take the following code and return the content you would write into a python script file. Do not include the ```python or ``` at the beginning and end of the code.',
                'user_prompt': output,
                'max_tokens': 4096,
                'temperature': 0.0,
                'top_p': 0.0,
                'update_files_action_result_history': False,
            }
            output = env.complete_text_openai(**write_as_python_code_args)
            
            # Write to file
            write_args = {
                'file_name': f'eureka/run_1_script_{i}.py',
                'content': output,
                'update_files_action_result_history': False,
            }
            env.write_file(**write_args)

            # Execute file
            execute_args = {
                'script_name': f'eureka/run_1_script_{i}.py',
                'update_files_action_result_history': False,
            }
            result = env.execute_script(**execute_args)
            results.append(result)

        print("Results: ", results)
        return results            

eureka_agent = EurekaAgent(env)

In [9]:
results = eureka_agent.initial_system_prompt(env)



--- LOGGING NEW ACTION ---
Step: 23
Calling function wrapped_read_file(args = (), kwargs = {'file_name': ['data_description.txt', 'eureka', 'research_problem.txt', 'sample_submission.csv', 'sample_submission.csv.gz', 'test.csv', 'test.csv.gz', 'train.csv', 'train.csv.gz'], 'work_dir': 'workspace\\home-data-for-ml-course_branch'})

--- TOOL ERROR --- cannot read file ['data_description.txt', 'eureka', 'research_problem.txt', 'sample_submission.csv', 'sample_submission.csv.gz', 'test.csv', 'test.csv.gz', 'train.csv', 'train.csv.gz']: join() argument must be str, bytes, or os.PathLike object, not 'list'


--- Step: 24 not recorded in history

--- Step Action: Calling function wrapped_read_file(args = (), kwargs = {'file_name': ['data_description.txt', 'eureka', 'research_problem.txt', 'sample_submission.csv', 'sample_submission.csv.gz', 'test.csv', 'test.csv.gz', 'train.csv', 'train.csv.gz'], 'work_dir': '.'})

--- Step Result: EnvError: cannot read file ['data_description.txt', 'eureka

In [None]:
# 2 run this 32 times
# 3 execute all 32
# 4 take the best one and save its training history -- tbh not sure if there's a standardized way to do this? Perhaps have an agent extract just the MAE and just grab the best one. Mini waste of tokens, but it's very little. 

# Prompt 2: Reward reflection and feedback
# 5 reward reflection and feedback
# 6 run this 32 times
# 7 execute all 32
# 8 take the best one and save its training history
# 9 repeat