Rearrange tests & fix CI (#4596)

* Rearrange tests into unit/integration/challenge categories * Fix linting + `tests.challenges` imports * Fix obscured duplicate test in test_url_validation.py * Move VCR conftest to tests.vcr * Specify tests to run & their order (unit -> integration -> challenges) in CI * Fail Docker CI when tests fail * Fix import & linting errors in tests * Fix `get_text_summary` * Fix linting errors * Clean up pytest args in CI * Remove bogus tests from GoCodeo
Significant-Gravitas · Jun 6, 2023 · dafbd11 · dafbd11 · vercel · Jun 6, 2023
1 parent 8a881f7
commit dafbd11
Show file tree

Hide file tree

Showing 59 changed files with 150 additions and 377 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,7 +5,7 @@ on:
     branches: [ master, ci-test* ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
     branches: [ stable, master ]
   pull_request_target:
@@ -148,8 +148,9 @@ jobs:
 
       - name: Run pytest with coverage
         run: |
-          pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term
-          python tests/integration/challenges/utils/build_current_score.py
+          pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+            tests/unit tests/integration tests/challenges
+          python tests/challenges/utils/build_current_score.py
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
@@ -179,7 +180,7 @@ jobs:
       - name: Push updated challenge scores
         if: github.event_name == 'push'
         run: |
-          score_file="tests/integration/challenges/current_score.json"
+          score_file="tests/challenges/current_score.json"
 
           if ! git diff --quiet $score_file; then
             git add $score_file

diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
@@ -5,7 +5,7 @@ on:
     branches: [ master ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
     branches: [ master, stable ]
 
@@ -108,15 +108,18 @@ jobs:
           set +e
           test_output=$(
             docker run --env CI --env OPENAI_API_KEY --entrypoint python ${{ env.IMAGE_NAME }} -m \
-            pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term 2>&1
+            pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing \
+              tests/unit tests/integration 2>&1
           )
           test_failure=$?
-  
+
           echo "$test_output"
-  
+
           cat << $EOF >> $GITHUB_STEP_SUMMARY
           # Tests $([ $test_failure = 0 ] && echo '✅' || echo '❌')
           \`\`\`
           $test_output
           \`\`\`
           $EOF
+
+          exit $test_failure
diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
@@ -6,7 +6,7 @@ on:
     branches: [ master ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   # So that the `dirtyLabel` is removed if conflicts are resolve
   # We recommend `pull_request_target` so that github secrets are available.
   # In `pull_request` we wouldn't be able to change labels of fork PRs

diff --git a/autogpt/app.py b/autogpt/app.py
@@ -142,7 +142,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
     Returns:
         str: The summary of the text
     """
-    text = scrape_text(url)
+    text = scrape_text(url, config)
     summary, _ = summarize_text(text, question=question)
 
     return f""" "Result" : {summary}"""

diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
@@ -70,7 +70,7 @@ def kubernetes_agent(
 ```
 
 ## Creating your challenge
-Go to `tests/integration/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
+Go to `tests/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
 
 Your test could look something like this 
 
@@ -84,7 +84,7 @@ import yaml
 
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.integration.agent_utils import run_interaction_loop
-from tests.integration.challenges.utils import run_multiple_times
+from tests.challenges.utils import run_multiple_times
 from tests.utils import requires_api_key
 
 

diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
 ```
 
 ## Description

diff --git a/docs/challenges/information_retrieval/challenge_b.md b/docs/challenges/information_retrieval/challenge_b.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
 ```
 
 ## Description

diff --git a/docs/challenges/memory/challenge_b.md b/docs/challenges/memory/challenge_b.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_b.py --level=3
+pytest -s tests/challenges/memory/test_memory_challenge_b.py --level=3
 ``
 
 ## Description
@@ -41,4 +41,3 @@ Write all the task_ids into the file output.txt. The file has not been created y
 ## Objective
 
 The objective of this challenge is to test the agent's ability to follow instructions and maintain memory of the task IDs throughout the process. The agent successfully completed this challenge if it wrote the task ids in a file.
-
diff --git a/docs/challenges/memory/challenge_c.md b/docs/challenges/memory/challenge_c.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_c.py --level=2
+pytest -s tests/challenges/memory/test_memory_challenge_c.py --level=2
 ``
 
 ## Description

diff --git a/mypy.ini b/mypy.ini
@@ -2,7 +2,7 @@
 follow_imports = skip
 check_untyped_defs = True
 disallow_untyped_defs = True
-files = tests/integration/challenges/**/*.py
+files = tests/challenges/**/*.py
 
 [mypy-requests.*]
 ignore_missing_imports = True

diff --git a/tests/integration/challenges/__init__.py → tests/challenges/__init__.py b/tests/integration/challenges/__init__.py → tests/challenges/__init__.py
diff --git a/...on/challenges/basic_abilities/__init__.py → tests/challenges/basic_abilities/__init__.py b/...on/challenges/basic_abilities/__init__.py → tests/challenges/basic_abilities/__init__.py
diff --git a/...es/basic_abilities/goal_oriented_tasks.md → ...es/basic_abilities/goal_oriented_tasks.md b/...es/basic_abilities/goal_oriented_tasks.md → ...es/basic_abilities/goal_oriented_tasks.md
diff --git a/...es/basic_abilities/test_browse_website.py → ...es/basic_abilities/test_browse_website.py b/...es/basic_abilities/test_browse_website.py → ...es/basic_abilities/test_browse_website.py
@@ -1,10 +1,8 @@
 import pytest
 
 from autogpt.agent import Agent
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 2

diff --git a/...lenges/basic_abilities/test_write_file.py → ...lenges/basic_abilities/test_write_file.py b/...lenges/basic_abilities/test_write_file.py → ...lenges/basic_abilities/test_write_file.py
@@ -4,10 +4,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3

diff --git a/...hallenges/challenge_decorator/__init__.py → ...hallenges/challenge_decorator/__init__.py b/...hallenges/challenge_decorator/__init__.py → ...hallenges/challenge_decorator/__init__.py
diff --git a/...allenges/challenge_decorator/challenge.py → ...allenges/challenge_decorator/challenge.py b/...allenges/challenge_decorator/challenge.py → ...allenges/challenge_decorator/challenge.py
diff --git a/...hallenge_decorator/challenge_decorator.py → ...hallenge_decorator/challenge_decorator.py b/...hallenge_decorator/challenge_decorator.py → ...hallenge_decorator/challenge_decorator.py
@@ -4,11 +4,9 @@
 
 import pytest
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.challenges.challenge_decorator.challenge_utils import (
-    create_challenge,
-)
-from tests.integration.challenges.challenge_decorator.score_utils import (
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge_utils import create_challenge
+from tests.challenges.challenge_decorator.score_utils import (
     get_scores,
     update_new_score,
 )

diff --git a/...es/challenge_decorator/challenge_utils.py → ...es/challenge_decorator/challenge_utils.py b/...es/challenge_decorator/challenge_utils.py → ...es/challenge_decorator/challenge_utils.py
@@ -1,7 +1,7 @@
 import os
 from typing import Any, Callable, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CHALLENGE_PREFIX = "test_"
 

diff --git a/...lenges/challenge_decorator/score_utils.py → ...lenges/challenge_decorator/score_utils.py b/...lenges/challenge_decorator/score_utils.py → ...lenges/challenge_decorator/score_utils.py
@@ -2,7 +2,7 @@
 import os
 from typing import Any, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CURRENT_SCORE_LOCATION = "../current_score"
 NEW_SCORE_LOCATION = "../new_score"

diff --git a/tests/integration/challenges/conftest.py → tests/challenges/conftest.py b/tests/integration/challenges/conftest.py → tests/challenges/conftest.py
@@ -5,9 +5,8 @@
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.conftest import BASE_VCR_CONFIG
-from tests.vcr.vcr_filter import before_record_response
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.vcr import BASE_VCR_CONFIG, before_record_response
 
 
 def before_record_response_filter_errors(

diff --git a/...integration/challenges/current_score.json → tests/challenges/current_score.json b/...integration/challenges/current_score.json → tests/challenges/current_score.json
diff --git a/...ion/challenges/debug_code/data/two_sum.py → tests/challenges/debug_code/data/two_sum.py b/...ion/challenges/debug_code/data/two_sum.py → tests/challenges/debug_code/data/two_sum.py
diff --git a/...allenges/debug_code/data/two_sum_tests.py → ...allenges/debug_code/data/two_sum_tests.py b/...allenges/debug_code/data/two_sum_tests.py → ...allenges/debug_code/data/two_sum_tests.py
diff --git a/...debug_code/test_debug_code_challenge_a.py → ...debug_code/test_debug_code_challenge_a.py b/...debug_code/test_debug_code_challenge_a.py → ...debug_code/test_debug_code_challenge_a.py
@@ -7,10 +7,8 @@
 from autogpt.commands.execute_code import execute_python_file
 from autogpt.commands.file_operations import append_to_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 5

diff --git a/...test_information_retrieval_challenge_a.py → ...test_information_retrieval_challenge_a.py b/...test_information_retrieval_challenge_a.py → ...test_information_retrieval_challenge_a.py
@@ -3,10 +3,8 @@
 
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3

diff --git a/...test_information_retrieval_challenge_b.py → ...test_information_retrieval_challenge_b.py b/...test_information_retrieval_challenge_b.py → ...test_information_retrieval_challenge_b.py
@@ -6,10 +6,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3

diff --git a/...s/test_kubernetes_template_challenge_a.py → ...s/test_kubernetes_template_challenge_a.py b/...s/test_kubernetes_template_challenge_a.py → ...s/test_kubernetes_template_challenge_a.py
@@ -5,10 +5,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3

diff --git a/...integration/challenges/memory/__init__.py → tests/challenges/memory/__init__.py b/...integration/challenges/memory/__init__.py → tests/challenges/memory/__init__.py
diff --git a/...llenges/memory/test_memory_challenge_a.py → ...llenges/memory/test_memory_challenge_a.py b/...llenges/memory/test_memory_challenge_a.py → ...llenges/memory/test_memory_challenge_a.py
@@ -4,10 +4,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 

diff --git a/...llenges/memory/test_memory_challenge_b.py → ...llenges/memory/test_memory_challenge_b.py b/...llenges/memory/test_memory_challenge_b.py → ...llenges/memory/test_memory_challenge_b.py
@@ -4,10 +4,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key
 
 NOISE = 1000

diff --git a/...llenges/memory/test_memory_challenge_c.py → ...llenges/memory/test_memory_challenge_c.py b/...llenges/memory/test_memory_challenge_c.py → ...llenges/memory/test_memory_challenge_c.py
@@ -4,10 +4,8 @@
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key
 
 NOISE = 1000

diff --git a/...challenge_should_be_formatted_properly.py → ...challenge_should_be_formatted_properly.py b/...challenge_should_be_formatted_properly.py → ...challenge_should_be_formatted_properly.py
diff --git a/tests/integration/challenges/utils.py → tests/challenges/utils.py b/tests/integration/challenges/utils.py → tests/challenges/utils.py
diff --git a/...n/challenges/utils/build_current_score.py → ...s/challenges/utils/build_current_score.py b/...n/challenges/utils/build_current_score.py → ...s/challenges/utils/build_current_score.py
@@ -26,12 +26,8 @@ def recursive_sort_dict(data: dict) -> dict:
 
 
 cwd = os.getcwd()  # get current working directory
-new_score_filename_pattern = os.path.join(
-    cwd, "tests/integration/challenges/new_score_*.json"
-)
-current_score_filename = os.path.join(
-    cwd, "tests/integration/challenges/current_score.json"
-)
+new_score_filename_pattern = os.path.join(cwd, "tests/challenges/new_score_*.json")
+current_score_filename = os.path.join(cwd, "tests/challenges/current_score.json")
 
 merged_data: Dict[str, Any] = {}
 for filename in glob.glob(new_score_filename_pattern):

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 
 import pytest
@@ -8,15 +7,11 @@
 from autogpt.llm.api_manager import ApiManager
 from autogpt.workspace import Workspace
 
-pytest_plugins = ["tests.integration.agent_factory", "tests.integration.memory.utils"]
-
-PROXY = os.environ.get("PROXY")
-
-
-@pytest.fixture()
-def vcr_cassette_dir(request):
-    test_name = os.path.splitext(request.node.name)[0]
-    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
+pytest_plugins = [
+    "tests.integration.agent_factory",
+    "tests.integration.memory.utils",
+    "tests.vcr",
+]
 
 
 @pytest.fixture()