## Beispiel wie man code aus einem String ausführen kann

In [21]:
code_to_execute = "print('Hello, World!')"

try:
    exec(code_to_execute)
except Exception as e:
    print(f"An error occurred: {e}")

Hello, World!


## Following steps are needed for testing:
1. Identify code from agent output string (ask agent to respond only with code)
2. Save code in dedicated variable 
3. Compare code output with ground trouth output defined by researcher

Before setting up the testing framework like this, we have to know which functions we want to test because this determines the ground truth ouput. Therefore the research for which functions to use should happen before setting up the framework @Felix

See below for sample test walkthrough:

In [7]:
import pandas as pd

agent_input = "How can I calculate the mean of all the values this list: [1, 2, 3, 4, 5] in Python? Please answer only with the code. Save the result in a variable called 'result'."

agent_output = """
my_list = [1, 2, 3, 4, 5]
result = sum(my_list) / len(my_list)
print(result)
"""

code = agent_output # for this example this is not needed, however if the agent ouput requires extraction this should happen before this statement

desired_result = 3.0 # to be defined by the researcher

def test_my_function(code, desired_result):
    local_vars = {}
    exec(code, globals(), local_vars)
    agent_result = local_vars.get('result', None)

    if isinstance(agent_result, pd.DataFrame):
        if desired_result.equals(agent_output):
            print("Agent output was correct.")
        else:
            print("Agent output was not correct.")

    else:
        if agent_result == desired_result:
            print("Agent output was correct.")
        else:
            print('Agent output was not correct.')

test_my_function(code, desired_result)

3.0
Agent output was correct.


## Testing this with sample pandas function ".from_dummies()"

In [34]:
import pandas as pd

In [56]:
df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]})

desired_result = pd.from_dummies(df, sep="_")

agent_input = """
How can I convert this dataframe: df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]}) into a categorical dataframe,
using only pandas. Please only respond with the code. Use only pandas and the standard libraries. Save the result dataframe in a variable called "result".
"""

agent_output = """
import pandas as pd

df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]})

result = df.astype("category")
"""


test_my_function(agent_output, desired_result)

Agent output was not correct.


## Testing with agent

In [1]:
from agent import AIAgent
from clients import OpenAIClient
import json

# import dotenv
# import os
from settings import settings

# dotenv.load_dotenv()
# service_key = eval(os.getenv('SERVICE_KEY'))

client = OpenAIClient(settings.SERVICE_KEY, settings.LLM_CONFIG)
agent = AIAgent(client)

# Get the user's order
user_prompt = """
How can I convert this dataframe: df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]}) into a categorical dataframe?
"""

final_answer = agent.run(user_prompt)

{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': '{"Question": "How can I convert this dataframe: df = pd.DataFrame({\\"col1_a\\": [1, 0, 1], \\"col1_b\\": [0, 1, 0], \\"col2_a\\": [0, 1, 0], \\"col2_b\\": [1, 0, 0], \\"col2_c\\": [0, 0, 1]}) into a categorical dataframe?",\n "Thought": "To convert the dataframe into a categorical dataframe, we need to iterate over each column and convert it to a categorical type. We can use the astype(\'category\') function in pandas to do this.",\n "Action": "def response_function(df):\\n    for col in df.columns:\\n        df[col] = df[col].astype(\'category\')\\n    return df"}', 'role': 'assistant'}}], 'created': 1701292149, 'id': 'chatcmpl-8QM5tSPf9I3eJEfOb4wj2bzoDlgsI', 'model': 'gpt-4-32k', 'object': 'chat.completion', 'usage': {'completion_tokens': 172, 'prompt_tokens': 368, 'total_tokens': 540}}
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': '{"Thought": "I now know the final answer",\n "Ans

In [5]:
final_answer # response function

"def response_function(df):\n    for col in df.columns:\n        df[col] = df[col].astype('category')\n    return df"

In [9]:
# Create a namespace for the execution
namespace = {}

# Execute the code in the given namespace
exec(final_answer, namespace)

# Now the 'response_function' should be available in the namespace
response_function = namespace['response_function']

sample_df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]})
result_df = response_function(sample_df)

print(result_df)

  col1_a col1_b col2_a col2_b col2_c
0      1      0      0      1      0
1      0      1      1      0      0
2      1      0      0      0      1
