From 1827a6be060a5ea4f505a46fcf66fe1a5c14ca7e Mon Sep 17 00:00:00 2001 From: Michael Hogue Date: Tue, 16 Jan 2024 09:53:18 -0500 Subject: [PATCH 1/3] Check for last screenshot instead of summary screenshot --- evaluate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluate.py b/evaluate.py index f543c82c..63465c7f 100644 --- a/evaluate.py +++ b/evaluate.py @@ -24,7 +24,7 @@ Guideline: {guideline} """ -SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png') +SCREENSHOT_PATH = os.path.join('screenshots', 'screenshot.png') # Check if on a windows terminal that supports ANSI escape codes def supports_ansi(): @@ -82,7 +82,7 @@ def parse_eval_content(content): def evaluate_summary_screenshot(guideline): '''Load the summary screenshot and return True or False if it meets the given guideline.''' - with open(SUMMARY_SCREENSHOT_PATH, "rb") as img_file: + with open(SCREENSHOT_PATH, "rb") as img_file: img_base64 = base64.b64encode(img_file.read()).decode("utf-8") eval_message = [{ From 26c4295cd3b8fe11e656b239c212f0b46a6868b9 Mon Sep 17 00:00:00 2001 From: Michael Hogue Date: Tue, 16 Jan 2024 09:56:03 -0500 Subject: [PATCH 2/3] Update error message --- evaluate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluate.py b/evaluate.py index 63465c7f..bae46baa 100644 --- a/evaluate.py +++ b/evaluate.py @@ -80,8 +80,8 @@ def parse_eval_content(content): exit(1) -def evaluate_summary_screenshot(guideline): - '''Load the summary screenshot and return True or False if it meets the given guideline.''' +def evaluate_final_screenshot(guideline): + '''Load the final screenshot and return True or False if it meets the given guideline.''' with open(SCREENSHOT_PATH, "rb") as img_file: img_base64 = base64.b64encode(img_file.read()).decode("utf-8") @@ -116,9 +116,9 @@ def run_test_case(objective, guideline): subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL) try: - result = evaluate_summary_screenshot(guideline) + result = evaluate_final_screenshot(guideline) except(OSError): - print("Couldn't open the summary screenshot") + print("[Error] Couldn't open the screenshot for evaluation") return False return result From 791d9634d72de11688d23ebaf2345500a4c04bad Mon Sep 17 00:00:00 2001 From: Michael Hogue Date: Tue, 16 Jan 2024 10:18:22 -0500 Subject: [PATCH 3/3] Update test result message format --- evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluate.py b/evaluate.py index bae46baa..19b28d60 100644 --- a/evaluate.py +++ b/evaluate.py @@ -143,7 +143,7 @@ def main(): failed += 1 print( - f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} tests passed, {failed} tests failed" + f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} test{'' if passed == 1 else 's'} passed, {failed} test{'' if failed == 1 else 's'} failed" ) if __name__ == "__main__":