diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 368930a15f3f..2e143e9d4d15 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,6 +119,7 @@ jobs: - name: Run pytest tests with coverage run: | pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term + python tests/integration/challenges/utils/build_current_score.py env: CI: true PROXY: ${{ secrets.PROXY }} @@ -131,11 +132,20 @@ jobs: - name: Update cassette submodule to push target if push event if: ${{ github.event_name == 'push' }} run: | - cd tests/Auto-GPT-test-cassettes current_branch=$(echo ${{ github.ref }} | sed -e "s/refs\/heads\///g") - git fetch origin $current_branch git config --global user.name "Auto-GPT-Bot" git config --global user.email "github-bot@agpt.co" + git add tests/integration/challenges/current_score.json + + if ! git diff-index --quiet HEAD; then + git commit -m "Update current score" + git push origin HEAD:refs/heads/$current_branch + else + echo "The current score didn't change." + fi + + cd tests/Auto-GPT-test-cassettes + git fetch origin $current_branch git add . # Check if there are any changes @@ -150,7 +160,7 @@ jobs: git commit -m "Update submodule reference" git push origin HEAD:refs/heads/$current_branch else - echo "No changes to commit" + echo "No cassettes changes to commit" exit 0 fi @@ -182,7 +192,7 @@ jobs: echo "DIFF_EXISTS=false" >> $GITHUB_ENV fi - - name: Apply or remove prompt change label and comment + - name: Apply or remove behaviour change label and comment if: ${{ github.event_name == 'pull_request_target' }} run: | PR_NUMBER=${{ github.event.pull_request.number }} @@ -195,14 +205,14 @@ jobs: -H "Authorization: Bearer $TOKEN" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels \ - -d '{"labels":["prompt change"]}' + -d '{"labels":["behaviour change"]}' echo $TOKEN | gh auth login --with-token - gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's prompt. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." + gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged." else echo "Removing label..." curl -X DELETE \ -H "Authorization: Bearer $TOKEN" \ -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/prompt%20change + https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/behaviour%20change fi diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes index 5b051c64b65b..35f7a6997c1e 160000 --- a/tests/Auto-GPT-test-cassettes +++ b/tests/Auto-GPT-test-cassettes @@ -1 +1 @@ -Subproject commit 5b051c64b65ba102be782781e4c1619086619375 +Subproject commit 35f7a6997c1e8a48952d13a530ea8e1e8988e76e diff --git a/tests/integration/challenges/challenge_decorator/challenge.py b/tests/integration/challenges/challenge_decorator/challenge.py index baf821a1dd37..fd3b60cb6cb1 100644 --- a/tests/integration/challenges/challenge_decorator/challenge.py +++ b/tests/integration/challenges/challenge_decorator/challenge.py @@ -9,6 +9,7 @@ def __init__( name: str, category: str, max_level: int, + is_new_challenge: bool, max_level_beaten: Optional[int], level_to_run: Optional[int] = None, ) -> None: @@ -19,3 +20,4 @@ def __init__( self.succeeded = False self.skipped = False self.level_to_run = level_to_run + self.is_new_challenge = is_new_challenge diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py index 580dc0890697..1ec43aec4173 100644 --- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py +++ b/tests/integration/challenges/challenge_decorator/challenge_decorator.py @@ -48,7 +48,7 @@ def wrapper(*args: Any, **kwargs: Any) -> None: pytest.skip("This test has not been unlocked yet.") if not challenge.succeeded: - if Challenge.BEAT_CHALLENGES: + if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge: # xfail pytest.xfail("Challenge failed") raise AssertionError("Challenge failed") diff --git a/tests/integration/challenges/challenge_decorator/challenge_utils.py b/tests/integration/challenges/challenge_decorator/challenge_utils.py index b94f71649038..7db7648fa4bc 100644 --- a/tests/integration/challenges/challenge_decorator/challenge_utils.py +++ b/tests/integration/challenges/challenge_decorator/challenge_utils.py @@ -13,13 +13,13 @@ def create_challenge( level_to_run: Optional[int] = None, ) -> Challenge: challenge_category, challenge_name = get_challenge_identifiers(func) - + is_new_challenge = challenge_name not in current_score.get(challenge_category, {}) max_level = get_max_level(current_score, challenge_category, challenge_name) max_level_beaten = get_max_level_beaten( current_score, challenge_category, challenge_name ) level_to_run = get_level_to_run( - is_beat_challenges, level_to_run, max_level, max_level_beaten + is_beat_challenges, level_to_run, max_level, max_level_beaten, is_new_challenge ) return Challenge( @@ -28,6 +28,7 @@ def create_challenge( max_level=max_level, max_level_beaten=max_level_beaten, level_to_run=level_to_run, + is_new_challenge=is_new_challenge, ) @@ -36,7 +37,10 @@ def get_level_to_run( level_to_run: Optional[int], max_level: int, max_level_beaten: Optional[int], + is_new_challenge: bool, ) -> Optional[int]: + if is_new_challenge: + return 1 if level_to_run is not None: if level_to_run > max_level: raise ValueError( diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json index 741ec7a59aa7..8cb4f069d8f9 100644 --- a/tests/integration/challenges/current_score.json +++ b/tests/integration/challenges/current_score.json @@ -35,4 +35,4 @@ "max_level_beaten": 1 } } -} +} \ No newline at end of file diff --git a/tests/test_image_gen.py b/tests/integration/test_image_gen.py similarity index 97% rename from tests/test_image_gen.py rename to tests/integration/test_image_gen.py index 5c04921b5a52..0156c9e5bdc9 100644 --- a/tests/test_image_gen.py +++ b/tests/integration/test_image_gen.py @@ -16,11 +16,9 @@ def image_size(request): return request.param -@pytest.mark.xfail( - reason="The image is too big to be put in a cassette for a CI pipeline. We're looking into a solution." -) @requires_api_key("OPENAI_API_KEY") -def test_dalle(config, workspace, image_size): +@pytest.mark.vcr +def test_dalle(config, workspace, image_size, patched_api_requestor): """Test DALL-E image generation.""" generate_and_validate( config,