diff --git a/evaluate.py b/evaluate.py index f543c82c..19b28d60 100644 --- a/evaluate.py +++ b/evaluate.py @@ -24,7 +24,7 @@ Guideline: {guideline} """ -SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png') +SCREENSHOT_PATH = os.path.join('screenshots', 'screenshot.png') # Check if on a windows terminal that supports ANSI escape codes def supports_ansi(): @@ -80,9 +80,9 @@ def parse_eval_content(content): exit(1) -def evaluate_summary_screenshot(guideline): - '''Load the summary screenshot and return True or False if it meets the given guideline.''' - with open(SUMMARY_SCREENSHOT_PATH, "rb") as img_file: +def evaluate_final_screenshot(guideline): + '''Load the final screenshot and return True or False if it meets the given guideline.''' + with open(SCREENSHOT_PATH, "rb") as img_file: img_base64 = base64.b64encode(img_file.read()).decode("utf-8") eval_message = [{ @@ -116,9 +116,9 @@ def run_test_case(objective, guideline): subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL) try: - result = evaluate_summary_screenshot(guideline) + result = evaluate_final_screenshot(guideline) except(OSError): - print("Couldn't open the summary screenshot") + print("[Error] Couldn't open the screenshot for evaluation") return False return result @@ -143,7 +143,7 @@ def main(): failed += 1 print( - f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} tests passed, {failed} tests failed" + f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} test{'' if passed == 1 else 's'} passed, {failed} test{'' if failed == 1 else 's'} failed" ) if __name__ == "__main__":