From fd8f601be136c22984ec9af4718b181bfa46d20c Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Sun, 17 Sep 2023 17:11:23 -0700 Subject: [PATCH] Fix benchmark being stateful Signed-off-by: Merwane Hamadi --- .github/workflows/benchmark-ci.yml | 4 + autogpts/forge/forge/__main__.py | 1 - autogpts/forge/run_benchmark | 4 +- benchmark/agbenchmark/__main__.py | 90 ++++++++----------- benchmark/agbenchmark/agent_interface.py | 85 +----------------- benchmark/agbenchmark/app.py | 26 ++++-- .../deprecated/content_gen/2_plan/data.json | 2 +- benchmark/agbenchmark/execute_sub_process.py | 79 ++++++++++++++++ benchmark/agbenchmark/generate_test.py | 16 +--- .../agbenchmark/reports/ReportManager.py | 46 +++++++++- .../reports/agent_benchmark_config.py | 18 ++++ benchmark/agbenchmark/reports/reports.py | 30 +++---- benchmark/tests/__init__.py | 0 benchmark/tests/test_web_server.py | 66 ++++++++++++++ 14 files changed, 290 insertions(+), 177 deletions(-) create mode 100644 benchmark/agbenchmark/execute_sub_process.py create mode 100644 benchmark/agbenchmark/reports/agent_benchmark_config.py create mode 100644 benchmark/tests/__init__.py create mode 100644 benchmark/tests/test_web_server.py diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml index 426f0d45c46..148038d4584 100644 --- a/.github/workflows/benchmark-ci.yml +++ b/.github/workflows/benchmark-ci.yml @@ -127,5 +127,9 @@ jobs: echo "Running the following command: ${prefix}agbenchmark --test=WriteFile" ${prefix}agbenchmark --test=WriteFile + sh run_benchmark & + poetry run python ../../benchmark/tests/test_web_server.py & + poetry run ../../benchmark/tests/test_web_server.py & + poetry run ../../benchmark/tests/test_web_server.py env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/autogpts/forge/forge/__main__.py b/autogpts/forge/forge/__main__.py index f20644b7da9..2dad519dfe1 100644 --- a/autogpts/forge/forge/__main__.py +++ b/autogpts/forge/forge/__main__.py @@ -4,7 +4,6 @@ load_dotenv() import forge.sdk.forge_log - forge.sdk.forge_log.setup_logger() diff --git a/autogpts/forge/run_benchmark b/autogpts/forge/run_benchmark index fa95ee76935..9d9253959bb 100755 --- a/autogpts/forge/run_benchmark +++ b/autogpts/forge/run_benchmark @@ -2,4 +2,6 @@ kill $(lsof -t -i :8080) -poetry run agbenchmark serve +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +uvicorn agbenchmark.app:app --host localhost --port 8080 --reload --log-level info --reload-dir "$SCRIPT_DIR/../../benchmark/agbenchmark" diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index a2de579e0f0..caafa14f425 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -11,12 +11,9 @@ import toml from helicone.lock import HeliconeLockManager -from agbenchmark.app import app +from agbenchmark.reports.ReportManager import SingletonReportManager from agbenchmark.utils.data_types import AgentBenchmarkConfig -from .reports.ReportManager import ReportManager -from .utils.data_types import AgentBenchmarkConfig - BENCHMARK_START_TIME_DT = datetime.now(timezone.utc) BENCHMARK_START_TIME = BENCHMARK_START_TIME_DT.strftime("%Y-%m-%dT%H:%M:%S+00:00") TEMP_FOLDER_ABS_PATH = Path.cwd() / "agbenchmark_config" / "temp_folder" @@ -26,50 +23,6 @@ UPDATES_JSON_PATH = Path.cwd() / "agbenchmark_config" / "updates.json" -def get_agent_benchmark_config() -> AgentBenchmarkConfig: - agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json") - try: - with open(agent_benchmark_config_path, "r") as f: - agent_benchmark_config = AgentBenchmarkConfig(**json.load(f)) - agent_benchmark_config.agent_benchmark_config_path = ( - agent_benchmark_config_path - ) - return agent_benchmark_config - except json.JSONDecodeError: - print("Error: benchmark_config.json is not a valid JSON file.") - raise - - -def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]: - agent_benchmark_config = get_agent_benchmark_config() - # tests that consistently pass are considered regression tests - REGRESSION_MANAGER = ReportManager( - agent_benchmark_config.get_regression_reports_path(), BENCHMARK_START_TIME_DT - ) - - # print(f"Using {REPORTS_PATH} for reports") - # user facing reporting information - INFO_MANAGER = ReportManager( - str( - agent_benchmark_config.get_reports_path( - benchmark_start_time=BENCHMARK_START_TIME_DT - ) - / "report.json" - ), - BENCHMARK_START_TIME_DT, - ) - - # internal db step in replacement track pass/fail rate - INTERNAL_INFO_MANAGER = ReportManager( - agent_benchmark_config.get_success_rate_path(), BENCHMARK_START_TIME_DT - ) - - return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER - - -(REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER) = get_report_managers() - - if os.environ.get("HELICONE_API_KEY"): HeliconeLockManager.write_custom_property( "benchmark_start_time", BENCHMARK_START_TIME @@ -122,6 +75,8 @@ def run_benchmark( ) -> int: """Start the benchmark tests. If a category flag is provided, run the categories with that mark.""" # Check if configuration file exists and is not empty + initialize_updates_file() + SingletonReportManager() agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json") try: with open(agent_benchmark_config_path, "r") as f: @@ -214,7 +169,8 @@ def run_benchmark( current_dir = Path(__file__).resolve().parent print(f"Current directory: {current_dir}") pytest_args.extend((str(current_dir), "--cache-clear")) - return pytest.main(pytest_args) + exit_code = pytest.main(pytest_args) + SingletonReportManager().clear_instance() @click.group(invoke_without_command=True) @@ -314,11 +270,39 @@ def version(): print(f"Benchmark Tool Version {version}") -def serve(): - import uvicorn +from pathlib import Path - # Run the FastAPI application using uvicorn - uvicorn.run(app, host="0.0.0.0", port=8080) +# class CustomStatReload(StatReload): +# def __init__(self, *args, **kwargs): +# super().__init__(*args, **kwargs) +# # Overriding the directories to watch +# self.dirs = [Path(__file__).absolute().parent] + +# def serve(): +# current_file_path = Path(__file__).absolute().parent +# config = Config( +# "agbenchmark.app:app", # Reference to your FastAPI application +# host="localhost", # Host to bind +# port=8080, # Port to bind +# reload=True, # Enable reload +# log_level="info", # Logging level +# # reload_dirs=[str(current_file_path)], # Directories to watch +# ) +# server = Server(config) +# server.run() + + +def initialize_updates_file(): + if os.path.exists(UPDATES_JSON_PATH): + # If the file already exists, overwrite it with an empty list + with open(UPDATES_JSON_PATH, "w") as file: + json.dump([], file, indent=2) + print("Initialized updates.json by overwriting with an empty array") + else: + # If the file doesn't exist, create it and write an empty list + with open(UPDATES_JSON_PATH, "w") as file: + json.dump([], file, indent=2) + print("Created updates.json and initialized it with an empty array") if __name__ == "__main__": diff --git a/benchmark/agbenchmark/agent_interface.py b/benchmark/agbenchmark/agent_interface.py index 5d1b24c58b3..269e8f8ff49 100644 --- a/benchmark/agbenchmark/agent_interface.py +++ b/benchmark/agbenchmark/agent_interface.py @@ -1,18 +1,11 @@ import os -import platform -import queue -import select import shutil -import subprocess import sys -import time -from threading import Thread -from typing import Any, List +from typing import List -import psutil from dotenv import load_dotenv -from agbenchmark.utils.data_types import AgentBenchmarkConfig +from agbenchmark.execute_sub_process import execute_subprocess load_dotenv() @@ -22,82 +15,12 @@ ) -def run_linux_env(process: Any, start_time: float, timeout: float) -> None: - while True: - try: - # This checks if there's data to be read from stdout without blocking. - if process.stdout and select.select([process.stdout], [], [], 0)[0]: - output = process.stdout.readline() - print(output.strip()) - except Exception as e: - continue - - # Check if process has ended, has no more output, or exceeded timeout - if process.poll() is not None or (time.time() - start_time > timeout): - break - - if time.time() - start_time > timeout: - print("The Python function has exceeded the time limit and was terminated.") - parent = psutil.Process(process.pid) - for child in parent.children(recursive=True): - child.kill() - parent.kill() - - else: - print("The Python function has finished running.") - - -def enqueue_output(out: Any, my_queue: Any) -> None: - for line in iter(out.readline, b""): - my_queue.put(line) - out.close() - - -def run_windows_env(process: Any, start_time: float, timeout: float) -> None: - my_queue: Any = queue.Queue() - thread = Thread(target=enqueue_output, args=(process.stdout, my_queue)) - thread.daemon = True - thread.start() - - while True: - try: - output = my_queue.get_nowait().strip() - print(output) - except queue.Empty: - pass - - if process.poll() is not None or (time.time() - start_time > timeout): - break - - if time.time() - start_time > timeout: - print("The Python function has exceeded the time limit and was terminated.") - process.terminate() - - -def run_agent(task: str, timeout: int, agent_config: AgentBenchmarkConfig) -> None: +def run_agent(task: str, timeout: int) -> None: print(f"Running agbenchmark/benchmarks.py with timeout {timeout}") command = [sys.executable, "-m", "agbenchmark_config.benchmarks", str(task)] - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - bufsize=1, - ) - - start_time = time.time() - - if platform.system() == "Windows": - run_windows_env(process, start_time, timeout) - else: - run_linux_env(process, start_time, timeout) - - process.wait() - - if process.returncode != 0: - print(f"The agent timed out") + execute_subprocess(command, timeout) def get_list_of_file_paths( diff --git a/benchmark/agbenchmark/app.py b/benchmark/agbenchmark/app.py index 0485528b484..49be8c9c95d 100644 --- a/benchmark/agbenchmark/app.py +++ b/benchmark/agbenchmark/app.py @@ -10,11 +10,11 @@ from fastapi import Request, Response from fastapi.middleware.cors import CORSMiddleware -# from agbenchmark.app import app +from agbenchmark.execute_sub_process import execute_subprocess sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from fastapi import FastAPI -from pydantic import BaseModel +from pydantic import BaseModel, Extra # Change the current working directory to the benchmark path # home_path = find_absolute_benchmark_path() @@ -28,6 +28,9 @@ class CreateReportRequest(BaseModel): category: Optional[str] = [] mock: Optional[bool] = False + class Config: + extra = Extra.forbid # this will forbid any extra fields + updates_list = [] @@ -50,25 +53,29 @@ class CreateReportRequest(BaseModel): ) +def stream_output(pipe): + for line in pipe: + print(line, end="") + + @app.post("/reports") def run_single_test(body: CreateReportRequest) -> Any: - from agbenchmark.__main__ import run_benchmark - + print(body.dict()) # it's a hack because other parts of the code are using sys.argv - sys.argv = [sys.argv[0]] - sys.argv.append("start") + print(os.getcwd()) + command_options = ["agbenchmark"] if body.category: sys.argv.append(f"--category={body.category}") for body_test in body.tests: - sys.argv.append(f"--test={body_test}") - categories = None + command_options.append(f"--test={body_test}") if body.category: categories = tuple([body.category]) - run_benchmark(category=categories, mock=body.mock, test=tuple(body.tests)) + execute_subprocess(command_options, 200) import json from pathlib import Path + print("finished running") # List all folders in the current working directory path_reports = Path.cwd() / "agbenchmark_config" / "reports" folders = [folder for folder in path_reports.iterdir() if folder.is_dir()] @@ -82,6 +89,7 @@ def run_single_test(body: CreateReportRequest) -> Any: # Read report.json from this folder if last_folder: report_path = last_folder / "report.json" + print(report_path) if report_path.exists(): with report_path.open() as file: data = json.load(file) diff --git a/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json b/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json index e96994cf267..ed60d428cf9 100644 --- a/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json +++ b/benchmark/agbenchmark/challenges/deprecated/content_gen/2_plan/data.json @@ -17,7 +17,7 @@ }, "info": { "difficulty": "basic", - "description": "s ability to generate content based on the content of 2 files.", + "description": "ability to generate content based on the content of 2 files.", "side_effects": [] } } diff --git a/benchmark/agbenchmark/execute_sub_process.py b/benchmark/agbenchmark/execute_sub_process.py new file mode 100644 index 00000000000..b981e6be57c --- /dev/null +++ b/benchmark/agbenchmark/execute_sub_process.py @@ -0,0 +1,79 @@ +import platform +import queue +import select +import subprocess +import time +from threading import Thread +from typing import Any + +import psutil + + +def run_linux_env(process: Any, start_time: float, timeout: float) -> None: + while True: + try: + # This checks if there's data to be read from stdout without blocking. + if process.stdout and select.select([process.stdout], [], [], 0)[0]: + output = process.stdout.readline() + print(output.strip()) + except Exception as e: + continue + + # Check if process has ended, has no more output, or exceeded timeout + if process.poll() is not None or (time.time() - start_time > timeout): + break + + if time.time() - start_time > timeout: + print("The Python function has exceeded the time limit and was terminated.") + parent = psutil.Process(process.pid) + for child in parent.children(recursive=True): + child.kill() + parent.kill() + + else: + print("The Python function has finished running.") + + +def enqueue_output(out: Any, my_queue: Any) -> None: + for line in iter(out.readline, b""): + my_queue.put(line) + out.close() + + +def run_windows_env(process: Any, start_time: float, timeout: float) -> None: + my_queue: Any = queue.Queue() + thread = Thread(target=enqueue_output, args=(process.stdout, my_queue)) + thread.daemon = True + thread.start() + + while True: + try: + output = my_queue.get_nowait().strip() + print(output) + except queue.Empty: + pass + + if process.poll() is not None or (time.time() - start_time > timeout): + break + + if time.time() - start_time > timeout: + print("The Python function has exceeded the time limit and was terminated.") + process.terminate() + + +def execute_subprocess(command, timeout): + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + bufsize=1, + ) + start_time = time.time() + if platform.system() == "Windows": + run_windows_env(process, start_time, timeout) + else: + run_linux_env(process, start_time, timeout) + process.wait() + if process.returncode != 0: + print(f"The agent timed out") diff --git a/benchmark/agbenchmark/generate_test.py b/benchmark/agbenchmark/generate_test.py index ef7dc40ce65..92efd79844b 100644 --- a/benchmark/agbenchmark/generate_test.py +++ b/benchmark/agbenchmark/generate_test.py @@ -10,7 +10,7 @@ import pytest -from agbenchmark.__main__ import CHALLENGES_ALREADY_BEATEN, UPDATES_JSON_PATH +from agbenchmark.__main__ import CHALLENGES_ALREADY_BEATEN from agbenchmark.agent_api_interface import append_updates_file from agbenchmark.agent_protocol_client.models.step import Step from agbenchmark.utils.challenge import Challenge @@ -218,18 +218,4 @@ def challenge_should_be_ignored(json_file): return "challenges/deprecated" in json_file or "challenges/library" in json_file -def initialize_updates_file(): - if os.path.exists(UPDATES_JSON_PATH): - # If the file already exists, overwrite it with an empty list - with open(UPDATES_JSON_PATH, "w") as file: - json.dump([], file, indent=2) - print("Initialized updates.json by overwriting with an empty array") - else: - # If the file doesn't exist, create it and write an empty list - with open(UPDATES_JSON_PATH, "w") as file: - json.dump([], file, indent=2) - print("Created updates.json and initialized it with an empty array") - - -initialize_updates_file() generate_tests() diff --git a/benchmark/agbenchmark/reports/ReportManager.py b/benchmark/agbenchmark/reports/ReportManager.py index fc4a553bc95..1b9e6ae5d91 100644 --- a/benchmark/agbenchmark/reports/ReportManager.py +++ b/benchmark/agbenchmark/reports/ReportManager.py @@ -1,3 +1,4 @@ +import copy import json import os import sys @@ -11,6 +12,48 @@ from agbenchmark.utils.utils import get_highest_success_difficulty +class SingletonReportManager: + instance = None + + def __new__(cls): + from agbenchmark.reports.agent_benchmark_config import ( + get_agent_benchmark_config, + ) + + if not cls.instance: + cls.instance = super(SingletonReportManager, cls).__new__(cls) + + agent_benchmark_config = get_agent_benchmark_config() + benchmark_start_time_dt = ( + datetime.now() + ) # or any logic to fetch the datetime + + # Make the Managers class attributes + cls.REGRESSION_MANAGER = ReportManager( + agent_benchmark_config.get_regression_reports_path(), + benchmark_start_time_dt, + ) + cls.INFO_MANAGER = ReportManager( + str( + agent_benchmark_config.get_reports_path(benchmark_start_time_dt) + / "report.json" + ), + benchmark_start_time_dt, + ) + cls.INTERNAL_INFO_MANAGER = ReportManager( + agent_benchmark_config.get_success_rate_path(), benchmark_start_time_dt + ) + + return cls.instance + + @classmethod + def clear_instance(cls): + cls.instance = None + cls.REGRESSION_MANAGER = None + cls.INFO_MANAGER = None + cls.INTERNAL_INFO_MANAGER = None + + class ReportManager: """Abstracts interaction with the regression tests file""" @@ -81,7 +124,7 @@ def end_info_report(self, config: AgentBenchmarkConfig) -> None: "highest_difficulty": get_highest_success_difficulty(self.tests), "total_cost": self.get_total_costs(), }, - "tests": self.tests, + "tests": copy.copy(self.tests), "config": { k: v for k, v in json.loads(config.json()).items() if v is not None }, @@ -105,6 +148,7 @@ def get_total_costs(self): cost = test_data["metrics"].get( "cost", 0 ) # gets the cost or defaults to 0 if cost is missing + if cost is not None: # check if cost is not None all_costs_none = False total_cost += cost # add cost to total diff --git a/benchmark/agbenchmark/reports/agent_benchmark_config.py b/benchmark/agbenchmark/reports/agent_benchmark_config.py new file mode 100644 index 00000000000..3b45ed713c9 --- /dev/null +++ b/benchmark/agbenchmark/reports/agent_benchmark_config.py @@ -0,0 +1,18 @@ +import json +from pathlib import Path + +from agbenchmark.utils.data_types import AgentBenchmarkConfig + + +def get_agent_benchmark_config() -> AgentBenchmarkConfig: + agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json") + try: + with open(agent_benchmark_config_path, "r") as f: + agent_benchmark_config = AgentBenchmarkConfig(**json.load(f)) + agent_benchmark_config.agent_benchmark_config_path = ( + agent_benchmark_config_path + ) + return agent_benchmark_config + except json.JSONDecodeError: + print("Error: benchmark_config.json is not a valid JSON file.") + raise diff --git a/benchmark/agbenchmark/reports/reports.py b/benchmark/agbenchmark/reports/reports.py index dd70500f251..8a6f04c46bb 100644 --- a/benchmark/agbenchmark/reports/reports.py +++ b/benchmark/agbenchmark/reports/reports.py @@ -3,13 +3,9 @@ import sys from typing import Any, Dict -from agbenchmark.__main__ import ( - CHALLENGES_ALREADY_BEATEN, - INFO_MANAGER, - INTERNAL_INFO_MANAGER, - REGRESSION_MANAGER, - get_agent_benchmark_config, -) +from agbenchmark.__main__ import CHALLENGES_ALREADY_BEATEN +from agbenchmark.reports.agent_benchmark_config import get_agent_benchmark_config +from agbenchmark.reports.ReportManager import SingletonReportManager from agbenchmark.utils.data_types import DifficultyLevel from agbenchmark.utils.get_data_from_helicone import get_data_from_helicone from agbenchmark.utils.utils import calculate_success_percentage @@ -21,12 +17,16 @@ def get_previous_test_results( agent_tests: dict[str, list[bool]] = {} mock = os.getenv("IS_MOCK") # Check if --mock is in sys.argv - prev_test_results = INTERNAL_INFO_MANAGER.tests.get(test_name, []) + prev_test_results = SingletonReportManager().INTERNAL_INFO_MANAGER.tests.get( + test_name, [] + ) if not mock: # only add if it's an actual test prev_test_results.append(info_details["metrics"]["success"]) - INTERNAL_INFO_MANAGER.add_test(test_name, prev_test_results) + SingletonReportManager().INTERNAL_INFO_MANAGER.add_test( + test_name, prev_test_results + ) # can calculate success rate regardless of mock info_details["metrics"]["success_%"] = calculate_success_percentage( @@ -45,7 +45,7 @@ def update_regression_tests( if len(prev_test_results) >= 3 and prev_test_results[-3:] == [True, True, True]: # if the last 3 tests were successful, add to the regression tests info_details["is_regression"] = True - REGRESSION_MANAGER.add_test(test_name, test_details) + SingletonReportManager().REGRESSION_MANAGER.add_test(test_name, test_details) def generate_single_call_report( @@ -95,7 +95,7 @@ def generate_single_call_report( info_details["metrics"]["success"] = True else: if not mock: # don't remove if it's a mock test - REGRESSION_MANAGER.remove_test(test_name) + SingletonReportManager().REGRESSION_MANAGER.remove_test(test_name) info_details["metrics"]["fail_reason"] = str(call.excinfo.value) if call.excinfo.typename == "Skipped": info_details["metrics"]["attempted"] = False @@ -146,7 +146,7 @@ def finalize_reports(item: Any, challenge_data: dict[str, Any]) -> None: nested_test_info, nested_test_name ) - INFO_MANAGER.add_test(test_name, info_details) + SingletonReportManager().INFO_MANAGER.add_test(test_name, info_details) def update_challenges_already_beaten( @@ -171,6 +171,6 @@ def update_challenges_already_beaten( def session_finish(suite_reports: dict) -> None: agent_benchmark_config = get_agent_benchmark_config() - INTERNAL_INFO_MANAGER.save() - INFO_MANAGER.end_info_report(agent_benchmark_config) - REGRESSION_MANAGER.save() + SingletonReportManager().INTERNAL_INFO_MANAGER.save() + SingletonReportManager().INFO_MANAGER.end_info_report(agent_benchmark_config) + SingletonReportManager().REGRESSION_MANAGER.save() diff --git a/benchmark/tests/__init__.py b/benchmark/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/benchmark/tests/test_web_server.py b/benchmark/tests/test_web_server.py new file mode 100644 index 00000000000..b592b4fd9e3 --- /dev/null +++ b/benchmark/tests/test_web_server.py @@ -0,0 +1,66 @@ +import threading +import time +import unittest + +import requests + + +class TestAPIRequests(unittest.TestCase): + URL = "http://localhost:8080" + + def test_post_correct_then_incorrect_request(self): + payload1 = {"tests": ["WriteFile", "ReadFile"], "mock": True} + + # First POST request + response1 = requests.post(self.URL + "/reports", json=payload1) + self.assertEqual(response1.status_code, 200) + # Here you might want to check other aspects of the response, e.g., response1.json() + print(response1.json()) + self.assertNotEqual(response1.json()["tests"], {}) + payload2 = {"tests": ["TestWriteFile", "TestReadFile"], "mock": True} + + # Second POST request + response2 = requests.post(self.URL + "/reports", json=payload2) + print(response2.json()) + + self.assertEqual(response2.json()["tests"], {}) + assert response1.json() != {} + # Here you might want to check other aspects of the response, e.g., response2.json() + + def test_invalid_payload(self): + invalid_payload = {"invalid_key": "value"} + response = requests.post(self.URL + "/reports", json=invalid_payload) + self.assertEqual(response.status_code, 422) # Assuming 400 for Bad Request + + def test_post_report_and_poll_updates(self): + payload1 = {"tests": ["WriteFile", "ReadFile"], "mock": True} + last_update_time = int(time.time()) + # First POST request in a separate thread + threading.Thread(target=self.send_post_request, args=(payload1,)).start() + + # Give a short time to ensure POST request is initiated before GET requests start + time.sleep(0.1) + + # Start GET requests + for _ in range(5): + # get the current UNIX time + response = requests.get( + f"{self.URL}/updates?last_update_time={last_update_time}" + ) + last_update_time = int(time.time()) + if response.status_code == 200 and response.json(): + print("Received a non-empty response:", response.json()) + break + + time.sleep(1) # wait for 1 second before the next request + else: + self.fail("No updates received") + + def send_post_request(self, payload): + response = requests.post(f"{self.URL}/reports", json=payload) + if response.status_code == 200: + print(response.json()) + + +if __name__ == "__main__": + unittest.main()