From 1827a6be060a5ea4f505a46fcf66fe1a5c14ca7e Mon Sep 17 00:00:00 2001
From: Michael Hogue <michael.hogue.19@gmail.com>
Date: Tue, 16 Jan 2024 09:53:18 -0500
Subject: [PATCH 1/3] Check for last screenshot instead of summary screenshot

---
 evaluate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index f543c82c..63465c7f 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -24,7 +24,7 @@
 Guideline: {guideline}
 """
 
-SUMMARY_SCREENSHOT_PATH = os.path.join('screenshots', 'summary_screenshot.png')
+SCREENSHOT_PATH = os.path.join('screenshots', 'screenshot.png')
 
 # Check if on a windows terminal that supports ANSI escape codes
 def supports_ansi():
@@ -82,7 +82,7 @@ def parse_eval_content(content):
 
 def evaluate_summary_screenshot(guideline):
     '''Load the summary screenshot and return True or False if it meets the given guideline.'''
-    with open(SUMMARY_SCREENSHOT_PATH, "rb") as img_file:
+    with open(SCREENSHOT_PATH, "rb") as img_file:
         img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
 
         eval_message = [{

From 26c4295cd3b8fe11e656b239c212f0b46a6868b9 Mon Sep 17 00:00:00 2001
From: Michael Hogue <michael.hogue.19@gmail.com>
Date: Tue, 16 Jan 2024 09:56:03 -0500
Subject: [PATCH 2/3] Update error message

---
 evaluate.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 63465c7f..bae46baa 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -80,8 +80,8 @@ def parse_eval_content(content):
         exit(1)
 
 
-def evaluate_summary_screenshot(guideline):
-    '''Load the summary screenshot and return True or False if it meets the given guideline.'''
+def evaluate_final_screenshot(guideline):
+    '''Load the final screenshot and return True or False if it meets the given guideline.'''
     with open(SCREENSHOT_PATH, "rb") as img_file:
         img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
 
@@ -116,9 +116,9 @@ def run_test_case(objective, guideline):
     subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL)
     
     try:
-        result = evaluate_summary_screenshot(guideline)
+        result = evaluate_final_screenshot(guideline)
     except(OSError):
-        print("Couldn't open the summary screenshot")
+        print("[Error] Couldn't open the screenshot for evaluation")
         return False
     
     return result

From 791d9634d72de11688d23ebaf2345500a4c04bad Mon Sep 17 00:00:00 2001
From: Michael Hogue <michael.hogue.19@gmail.com>
Date: Tue, 16 Jan 2024 10:18:22 -0500
Subject: [PATCH 3/3] Update test result message format

---
 evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluate.py b/evaluate.py
index bae46baa..19b28d60 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -143,7 +143,7 @@ def main():
             failed += 1
 
     print(
-        f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} tests passed, {failed} tests failed"
+        f"{ANSI_BRIGHT_MAGENTA}[EVALUATION COMPLETE]{ANSI_RESET} {passed} test{'' if passed == 1 else 's'} passed, {failed} test{'' if failed == 1 else 's'} failed"
     )
 
 if __name__ == "__main__":