create debug challenge (#4286)

Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com> Co-authored-by: symphony <john.tian31@gmail.com>
Significant-Gravitas · May 30, 2023 · f6ee61d · f6ee61d
1 parent 87776b2
commit f6ee61d
Show file tree

Hide file tree

Showing 7 changed files with 152 additions and 2 deletions.
diff --git a/BULLETIN.md b/BULLETIN.md
@@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati
 before the next release.
 Whether built-in support for the others will be added back in the future is subject to
 discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280
+
+# Challenge Workflow 🏆
+If you have been working on challenges... Thank You!
+But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again.
+This will install a new version of vcrpy that is compatible with running vcr in docker.
+This workflow will be fixed as soon as the maintainer from VCRpy merges our changes.
diff --git a/requirements.txt b/requirements.txt
@@ -58,6 +58,6 @@ pytest-benchmark
 pytest-cov
 pytest-integration
 pytest-mock
-vcrpy
+vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
 pytest-recording
 pytest-xdist
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
@@ -246,3 +246,41 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
     )
 
     return agent
+
+
+@pytest.fixture
+def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
+    command_registry = CommandRegistry()
+    command_registry.import_commands("autogpt.commands.file_operations")
+    command_registry.import_commands("autogpt.commands.execute_code")
+    command_registry.import_commands("autogpt.commands.improve_code")
+    command_registry.import_commands("autogpt.app")
+    command_registry.import_commands("autogpt.commands.task_statuses")
+
+    ai_config = AIConfig(
+        ai_name="Debug Code Agent",
+        ai_role="an autonomous agent that specializes in debugging python code",
+        ai_goals=[
+            "1-Run the code in the file named 'code.py' using the execute_code command.",
+            "2-Read code.py to understand why the code is not working as expected.",
+            "3-Modify code.py to fix the error.",
+            "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.",
+            "Do not use any other commands than execute_python_file and write_file",
+        ],
+    )
+    ai_config.command_registry = command_registry
+
+    system_prompt = ai_config.construct_full_prompt()
+    Config().set_continuous_mode(False)
+    agent = Agent(
+        ai_name="Debug Code Agent",
+        memory=memory_json_file,
+        command_registry=command_registry,
+        config=ai_config,
+        next_action_count=0,
+        system_prompt=system_prompt,
+        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        workspace_directory=workspace.root,
+    )
+
+    return agent
diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
@@ -19,6 +19,12 @@
             "max_level_beaten": 1
         }
     },
+    "debug_code": {
+        "debug_code_challenge_a": {
+            "max_level": 1,
+            "max_level_beaten": 1
+        }
+    },
     "kubernetes": {
         "kubernetes_template_challenge_a": {
             "max_level": 1,
@@ -39,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
+}
diff --git a/tests/integration/challenges/debug_code/data/two_sum.py b/tests/integration/challenges/debug_code/data/two_sum.py
@@ -0,0 +1,19 @@
+# mypy: ignore-errors
+from typing import List, Optional
+
+
+def two_sum(nums: List, target: int) -> Optional[int]:
+    seen = {}
+    for i, num in enumerate(nums):
+        complement = target - num
+        if complement in seen:
+            return [seen[complement], i]
+        seen[num] = i
+    return None
+
+
+# Example usage:
+nums = [2, 7, 11, 15]
+target = 9
+result = two_sum(nums, target)
+print(result)  # Output: [0, 1]
diff --git a/tests/integration/challenges/debug_code/data/two_sum_tests.py b/tests/integration/challenges/debug_code/data/two_sum_tests.py
@@ -0,0 +1,30 @@
+# mypy: ignore-errors
+# we need a new line at the top of the file to avoid a syntax error
+
+
+def test_two_sum(nums, target, expected_result):
+    # These tests are appended to the two_sum file so we can ignore this error for now
+    result = two_sum(nums, target)
+    print(result)
+    assert (
+        result == expected_result
+    ), f"AssertionError: Expected the output to be {expected_result}"
+
+
+# test the trivial case with the first two numbers
+nums = [2, 7, 11, 15]
+target = 9
+expected_result = [0, 1]
+test_two_sum(nums, target, expected_result)
+
+# test for ability to use zero and the same number twice
+nums = [2, 7, 0, 15, 12, 0]
+target = 0
+expected_result = [2, 5]
+test_two_sum(nums, target, expected_result)
+
+# test for first and last index usage and negative numbers
+nums = [-6, 7, 11, 4]
+target = -2
+expected_result = [0, 3]
+test_two_sum(nums, target, expected_result)
diff --git a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+import pytest
+from pytest_mock import MockerFixture
+
+from autogpt.agent import Agent
+from autogpt.commands.execute_code import execute_python_file
+from autogpt.commands.file_operations import append_to_file, write_to_file
+from autogpt.config import Config
+from tests.integration.challenges.challenge_decorator.challenge_decorator import (
+    challenge,
+)
+from tests.integration.challenges.utils import run_interaction_loop
+from tests.utils import requires_api_key
+
+CYCLE_COUNT = 5
+
+
+@pytest.mark.vcr
+@requires_api_key("OPENAI_API_KEY")
+@challenge
+def test_debug_code_challenge_a(
+    debug_code_agent: Agent,
+    monkeypatch: pytest.MonkeyPatch,
+    patched_api_requestor: MockerFixture,
+    config: Config,
+    level_to_run: int,
+) -> None:
+    """
+    Test whether the agent can debug a simple code snippet.
+
+    :param debug_code_agent: The agent to test.
+    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
+    :patched_api_requestor: Sends api requests to our API CI pipeline
+    :config: The config object for the agent.
+    :level_to_run: The level to run.
+    """
+
+    file_path = str(debug_code_agent.workspace.get_path("code.py"))
+
+    code_file_path = Path(__file__).parent / "data" / "two_sum.py"
+    test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py"
+
+    write_to_file(file_path, code_file_path.read_text(), config)
+
+    run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)
+
+    append_to_file(file_path, test_file_path.read_text(), config)
+
+    output = execute_python_file(file_path, config)
+    assert "error" not in output.lower(), f"Errors found in output: {output}!"