Skip to content

Commit

Permalink
create debug challenge (#4286)
Browse files Browse the repository at this point in the history
Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: symphony <john.tian31@gmail.com>
  • Loading branch information
3 people committed May 30, 2023
1 parent 87776b2 commit f6ee61d
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 2 deletions.
6 changes: 6 additions & 0 deletions BULLETIN.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati
before the next release.
Whether built-in support for the others will be added back in the future is subject to
discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280

# Challenge Workflow 🏆
If you have been working on challenges... Thank You!
But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again.
This will install a new version of vcrpy that is compatible with running vcr in docker.
This workflow will be fixed as soon as the maintainer from VCRpy merges our changes.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ pytest-benchmark
pytest-cov
pytest-integration
pytest-mock
vcrpy
vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
pytest-recording
pytest-xdist
38 changes: 38 additions & 0 deletions tests/integration/agent_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,41 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
)

return agent


@pytest.fixture
def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.commands.execute_code")
command_registry.import_commands("autogpt.commands.improve_code")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.task_statuses")

ai_config = AIConfig(
ai_name="Debug Code Agent",
ai_role="an autonomous agent that specializes in debugging python code",
ai_goals=[
"1-Run the code in the file named 'code.py' using the execute_code command.",
"2-Read code.py to understand why the code is not working as expected.",
"3-Modify code.py to fix the error.",
"Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.",
"Do not use any other commands than execute_python_file and write_file",
],
)
ai_config.command_registry = command_registry

system_prompt = ai_config.construct_full_prompt()
Config().set_continuous_mode(False)
agent = Agent(
ai_name="Debug Code Agent",
memory=memory_json_file,
command_registry=command_registry,
config=ai_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)

return agent
8 changes: 7 additions & 1 deletion tests/integration/challenges/current_score.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
"max_level_beaten": 1
}
},
"debug_code": {
"debug_code_challenge_a": {
"max_level": 1,
"max_level_beaten": 1
}
},
"kubernetes": {
"kubernetes_template_challenge_a": {
"max_level": 1,
Expand All @@ -39,4 +45,4 @@
"max_level_beaten": 1
}
}
}
}
19 changes: 19 additions & 0 deletions tests/integration/challenges/debug_code/data/two_sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# mypy: ignore-errors
from typing import List, Optional


def two_sum(nums: List, target: int) -> Optional[int]:
seen = {}
for i, num in enumerate(nums):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return None


# Example usage:
nums = [2, 7, 11, 15]
target = 9
result = two_sum(nums, target)
print(result) # Output: [0, 1]
30 changes: 30 additions & 0 deletions tests/integration/challenges/debug_code/data/two_sum_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# mypy: ignore-errors
# we need a new line at the top of the file to avoid a syntax error


def test_two_sum(nums, target, expected_result):
# These tests are appended to the two_sum file so we can ignore this error for now
result = two_sum(nums, target)
print(result)
assert (
result == expected_result
), f"AssertionError: Expected the output to be {expected_result}"


# test the trivial case with the first two numbers
nums = [2, 7, 11, 15]
target = 9
expected_result = [0, 1]
test_two_sum(nums, target, expected_result)

# test for ability to use zero and the same number twice
nums = [2, 7, 0, 15, 12, 0]
target = 0
expected_result = [2, 5]
test_two_sum(nums, target, expected_result)

# test for first and last index usage and negative numbers
nums = [-6, 7, 11, 4]
target = -2
expected_result = [0, 3]
test_two_sum(nums, target, expected_result)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import pytest
from pytest_mock import MockerFixture

from autogpt.agent import Agent
from autogpt.commands.execute_code import execute_python_file
from autogpt.commands.file_operations import append_to_file, write_to_file
from autogpt.config import Config
from tests.integration.challenges.challenge_decorator.challenge_decorator import (
challenge,
)
from tests.integration.challenges.utils import run_interaction_loop
from tests.utils import requires_api_key

CYCLE_COUNT = 5


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
@challenge
def test_debug_code_challenge_a(
debug_code_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
config: Config,
level_to_run: int,
) -> None:
"""
Test whether the agent can debug a simple code snippet.
:param debug_code_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
:patched_api_requestor: Sends api requests to our API CI pipeline
:config: The config object for the agent.
:level_to_run: The level to run.
"""

file_path = str(debug_code_agent.workspace.get_path("code.py"))

code_file_path = Path(__file__).parent / "data" / "two_sum.py"
test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py"

write_to_file(file_path, code_file_path.read_text(), config)

run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)

append_to_file(file_path, test_file_path.read_text(), config)

output = execute_python_file(file_path, config)
assert "error" not in output.lower(), f"Errors found in output: {output}!"

0 comments on commit f6ee61d

Please sign in to comment.