# Fulmine LABS Eyball
## Overview
This Python code implements a class wrapper around an Anomaly Detection model which can be used to visually check if an image is anomalous or not. The supported architecture for this model is 'Siamese Network'.
In order to perform reduce false negatives the code compares the image against a jury of randomly selected known good images of configurable size 'jury_size'.
If the number of jurors who vote that the image is simlar to the chosen known good image is below a configurable 'threshold' then the code returns a verdict of 'Anomalous', otherwise it returns a verdict of 'Normal'.
If an image path is not specified but screen coordinates are, these will be used instead, enabling direct integration with automated visual checking scripts.

One goal is to use this class as part of automating visual checking of a medical image (PACS) production pipeline, although it could theoretically visually check any type of image on which the model has been trained.

It also has the capability of describing the images, using GPT-4 Turbo Vision, if an OpenAI key is supplied in the 'Eyball-OpenAI_key.txt' file.

## Initialize the Eyball class

predictor = ModelPredictor(siamese_model_path, known_good_images_folder, Eyball_key, threshold, jury_size)

## Example calls

role = "You are a radiology PACS test engineer, analyzing PACS or test process related image anomalies"

image_description_directive = "If the image is obviously not a medical image, state *** ANOMALOUS ***. If it is a typical medical image as acquired by an imaging modality with no additions or enhancements, state *** NORMAL ***. Otherwise, if it is a medical image but it also clearly has textual overlays or annotations or digital or image processing artifacts that could have been added by the PACS image viewer technology, describe those features and append *** ANOMALOUS ***."

verdict = predictor.predict_siamese(test_image_path)

actual_description = predictor.describe_image(test_image_path, None, role, image_description_directive)

## Author
Duncan Henderson
Fulmine Labs LLC


In [25]:
import numpy as np
import os
import io
import cv2
from PIL import Image, ImageGrab
import logging
import random
import base64
import requests
from openai import OpenAI

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report



In [35]:
known_good_images_folder = r"D:\training_images\test\valid"
siamese_model_path = r'models\lung_ct_siamese_network_weights_043024.h5'

api_key_file='Eyball-OpenAI_key.txt'

jury_size=12
threshold = 0.5

# LLM prompts
role = "You are a radiology PACS test engineer, analyzing PACS or test process related image anomalies"
image_description_directive = "If the image is obviously not a medical image, state *** ANOMALOUS ***. If it is a typical medical image as acquired by an imaging modality with no additions or enhancements, state *** NORMAL ***. Otherwise, if it is a medical image but it also clearly has textual overlays or annotations or digital or image processing artifacts that could have been added by the PACS image viewer technology, describe those features and append *** ANOMALOUS ***."


In [36]:
# class ModelPredictor:

#     def __init__(self, siamese_model_path, known_good_images_folder, api_key_file='Eyball-OpenAI_key.txt', threshold=0.5, jury_size=12):
#         self.siamese_model_path = siamese_model_path
#         self.known_good_images_folder = known_good_images_folder
#         self.api_key = self.load_api_key(api_key_file)
#         self.client = OpenAI(api_key=self.api_key)
#         self.siamese_model = self.load_siamese_model(siamese_model_path)
#         self.threshold = threshold
#         self.jury_size = jury_size
#         self.known_good_images = self.preload_known_good_images()
#         self.headers = {
#             "Content-Type": "application/json",
#             "Authorization": f"Bearer {self.api_key}"
#         }
        
#     def load_api_key(self, filename):
#         try:
#             with open(filename, 'r') as file:
#                 return file.read().strip()
#         except FileNotFoundError:
#             raise Exception(f"API key file not found: {filename}")

#     def preload_known_good_images(self):
#         # Your existing method to preload images
#         print("Preloading known good images...")
#         image_paths = []
#         for root, dirs, files in os.walk(self.known_good_images_folder):
#             for file in files:
#                 if file.lower().endswith(('.png', '.jpg', '.jpeg')):
#                     full_path = os.path.join(root, file)
#                     image_paths.append(full_path)
#         return image_paths
#         # Cache known good images if needed here
        
#     # Continue to define ModelPredictor class
#     def load_siamese_model(self, siamese_model_path):
#         # Define the base network architecture
#         def initialize_base_network(input_shape):
#             input = Input(shape=input_shape)
#             x = Conv2D(64, (3, 3), activation='relu')(input)
#             x = MaxPooling2D((2, 2))(x)
#             x = Conv2D(128, (3, 3), activation='relu')(x)
#             x = MaxPooling2D((2, 2))(x)
#             x = Flatten()(x)
#             x = Dense(128, activation='relu')(x)
#             return Model(input, x)

#         # Rebuild the Siamese network architecture
#         input_shape = (152, 152, 1)
#         base_network = initialize_base_network(input_shape)
#         input_a = Input(shape=input_shape)
#         input_b = Input(shape=input_shape)
#         processed_a = base_network(input_a)
#         processed_b = base_network(input_b)
#         distance = Lambda(lambda tensors: tf.sqrt(tf.reduce_sum(tf.square(tensors[0] - tensors[1]), axis=1, keepdims=True)))([processed_a, processed_b])
#         model = Model([input_a, input_b], distance)
#         model.load_weights(siamese_model_path)  # Load the saved model or weights
#         print("Siamese model loaded successfully.")
#         return model

#     def predict_siamese(self, image_path=None, coordinates=None):
#         if coordinates:
#             # Capture the screen if coordinates are provided
#             captured_image = self.capture_screen(coordinates)
#             # Convert the captured image to grayscale and resize it
#             image = cv2.cvtColor(captured_image, cv2.COLOR_BGR2GRAY)
#             image = cv2.resize(image, (152, 152))
#         elif image_path:
#             # Process the image from file path
#             image = self.preprocess_image(image_path)
#         else:
#             raise ValueError("Either image_path or coordinates must be provided.")
        
#         image = np.expand_dims(image, axis=0)  # Adjust as necessary for the model input
    
#         # Randomly select a subset of known good images to compare against
#         selected_good_images = random.sample(self.known_good_images, min(self.jury_size, len(self.known_good_images)))
#         votes = []
    
#         for known_good_image_path in selected_good_images:
#             known_good_image = self.preprocess_image(known_good_image_path)
#             known_good_image = np.expand_dims(known_good_image, axis=0)  # Adjust as necessary
    
#             # Prepare the pair
#             image_pair = [image, known_good_image]
    
#             # Make prediction
#             prediction_distance = self.siamese_model.predict(image_pair)
#             is_similar = prediction_distance < self.threshold  # Threshold to determine similarity
    
#             # Debugging output
#             print(f"Comparing {image_path if image_path else 'screen capture'} with {known_good_image_path}: Distance = {prediction_distance}, Similar = {is_similar}")
#             votes.append(is_similar)
    
#         # Calculate the majority vote
#         num_similar = sum(votes)
#         majority_similar = num_similar > len(votes) / 2
#         print(f"Total votes for 'Similar': {num_similar}/{len(votes)}. Final verdict: {'Normal' if majority_similar else 'Anomalous'}")
    
#         return majority_similar

#     def compare_to_known_images(self, captured_image, threshold=0.5, jury_size=3):
#         processed_captured_image = self.preprocess_data(captured_image)
#         verdicts = []

#         for _ in range(jury_size):
#             comparison_image = np.random.choice(self.known_good_images)
#             prediction = self.siamese_model.predict([processed_captured_image, comparison_image])
#             verdicts.append(prediction < threshold)

#         # Determine if the majority verdict is 'similar' or 'dissimilar'
#         final_verdict = sum(verdicts) >= jury_size / 2
#         return final_verdict
    
#     def evaluate_image(self, coordinates):
#         # Capture the image from screen coordinates
#         captured_image = self.capture_screen(coordinates)

#         # Compare to known images to get a verdict
#         is_normal = self.compare_to_known_images(captured_image)

#         return is_normal

#     def preprocess_image(self, img_path: str, target_size=(152, 152)):
#         try:
#             image = load_img(img_path, target_size=target_size, color_mode='grayscale')
#             image = img_to_array(image)
#             image /= 255.0  # Normalize to [0, 1]
#             if image.shape[-1] == 1:  # Check if image is grayscale
#                 image = image.squeeze(-1)  # Remove the channels dimension if grayscale
#         except FileNotFoundError as e:
#             print(f"Failed to open image at {img_path}: {e}")
#             return None
#         except Exception as e:
#             print(f"Error processing image at {img_path}: {e}")
#             return None
#         return image 

#     def capture_screen(self, coordinates):
#         """ Capture the screen area defined by coordinates. """
#         screenshot = ImageGrab.grab(bbox=coordinates)
#         return np.array(screenshot, dtype=np.uint8)  # Ensure dtype is uint8

#     def encode_image(self, image):
#         """ Encode image array to base64 string. """
#         if isinstance(image, np.ndarray):
#             # Convert numpy array to PIL Image if it's not already one
#             image = Image.fromarray(image.astype('uint8'), 'RGB')
#         buffer = io.BytesIO()
#         image.save(buffer, format="JPEG")
#         return base64.b64encode(buffer.getvalue()).decode('utf-8')


#     def describe_image(self, image_path=None, coordinates=None, role_description="User", image_description_directive="Describe the image"):
#         if image_path:
#             image = self.preprocess_image(image_path)
#         elif coordinates:
#             image = self.capture_screen(coordinates)
#         else:
#             raise ValueError("Either image_path or coordinates must be provided.")
    
#         if image is None:
#             raise ValueError("Failed to load or process image.")
    
#         base64_image = self.encode_image(image)
    
    
#         # Ensure image is properly formatted as a numpy array if not done in preprocess
#         if not isinstance(image, np.ndarray):
#             raise ValueError("Processed image must be a numpy array.")
    
#         # Encode the processed image to base64
#         base64_image = self.encode_image(image)
        
#         # Construct payload
#         payload = {
#             "model": "gpt-4-turbo",
#             "messages": [
#                 {
#                     "role": "system",
#                     "content": role_description
#                 },
#                 {
#                     "role": "user",
#                     "content": [
#                         {
#                             "type": "text",
#                             "text": image_description_directive
#                         },
#                         {
#                             "type": "image_url",
#                             "image_url": {
#                                 "url": f"data:image/jpeg;base64,{base64_image}"
#                             }
#                         }
#                     ]
#                 }
#             ],
#             "max_tokens": 300
#         }
    
#         # Send request
#         response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
#         if response.status_code != 200:
#             print("Error from API:", response.status_code, response.text)
#             return None
    
#         try:
#             description = response.json()['choices'][0]['message']['content']
#             print("Image Description:", description)
#             return description
#         except KeyError as e:
#             print("Failed to parse API response:", response.json())
#             raise e


In [57]:
# import os
# import numpy as np
# import cv2
# from PIL import Image, ImageGrab
# import base64
# import io
# from tensorflow.keras.models import Model, load_model
# from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda
# import tensorflow as tf
# from openai import OpenAI

# class ModelPredictor:
#     def __init__(self, siamese_model_path, known_good_images_folder, api_key_file='Eyball-OpenAI_key.txt', threshold=0.5, jury_size=12):
#         self.siamese_model_path = siamese_model_path
#         self.known_good_images_folder = known_good_images_folder
#         self.api_key = self.load_api_key(api_key_file)
#         self.client = OpenAI(api_key=self.api_key)
#         self.siamese_model = self.load_siamese_model()
#         self.threshold = threshold
#         self.jury_size = jury_size
#         self.known_good_images = self.preload_known_good_images()
#         self.headers = {
#             "Content-Type": "application/json",
#             "Authorization": f"Bearer {self.api_key}"
#         }

#     def load_api_key(self, filename):
#         try:
#             with open(filename, 'r') as file:
#                 return file.read().strip()
#         except FileNotFoundError:
#             raise Exception(f"API key file not found: {filename}")

#     def preload_known_good_images(self):
#         print("Preloading known good images...")
#         image_paths = []
#         for root, dirs, files in os.walk(self.known_good_images_folder):
#             for file in files:
#                 if file.lower().endswith(('.png', '.jpg', '.jpeg')):
#                     full_path = os.path.join(root, file)
#                     image_paths.append(full_path)
#         return image_paths

#     def load_siamese_model(self):
#         input_shape = (152, 152, 1)
#         input_a = Input(shape=input_shape)
#         input_b = Input(shape=input_shape)
#         base_network = self.initialize_base_network(input_shape)
#         processed_a = base_network(input_a)
#         processed_b = base_network(input_b)
#         distance = Lambda(lambda tensors: tf.sqrt(tf.reduce_sum(tf.square(tensors[0] - tensors[1]), axis=1)))([processed_a, processed_b])
#         model = Model(inputs=[input_a, input_b], outputs=distance)
#         model.load_weights(self.siamese_model_path)
#         print("Siamese model loaded successfully.")
#         return model

#     def initialize_base_network(self, input_shape):
#         input = Input(shape=input_shape)
#         x = Conv2D(64, (3, 3), activation='relu')(input)
#         x = MaxPooling2D((2, 2))(x)
#         x = Conv2D(128, (3, 3), activation='relu')(x)
#         x = MaxPooling2D((2, 2))(x)
#         x = Flatten()(x)
#         x = Dense(128, activation='relu')(x)
#         return Model(inputs=input, outputs=x)

#     def preprocess_image(self, image=None, image_path=None):
#         if image_path is not None:
#             # Load image from file path
#             try:
#                 image = Image.open(image_path)
#                 image = image.convert('L')  # Convert to grayscale
#             except FileNotFoundError as e:
#                 print(f"Failed to open image at {image_path}: {e}")
#                 return None
#             except Exception as e:
#                 print(f"Error processing image at {image_path}: {e}")
#                 return None
#         elif image is None:
#             print("No image provided for preprocessing")
#             return None
    
#         # Resize and normalize the image
#         target_size = (152, 152)
#         image = image.resize(target_size)
#         image = np.array(image, dtype=np.uint8)
#         image = image / 255.0  # Normalize to [0, 1]
    
#         if image.ndim == 2:  # Ensure image has 3 dimensions
#             image = np.expand_dims(image, -1)
    
#         return image


#     def capture_screen(self, coordinates):
#         screenshot = ImageGrab.grab(bbox=coordinates)
#         return np.array(screenshot, dtype=np.uint8)  # Ensure dtype is uint8

#     def predict_siamese(self, image_path=None, coordinates=None):
#         if coordinates:
#             print("Capturing screen...")
#             captured_image = self.capture_screen(coordinates)
#             if captured_image is None:
#                 print("Failed to capture screen")
#                 return None
#             image = self.preprocess_image(captured_image)
#         elif image_path:
#             image = self.preprocess_image(image_path)
#             if image is None:
#                 print("Failed to preprocess image from path")
#                 return None
#         else:
#             raise ValueError("Either image_path or coordinates must be provided.")
        
#         if image is None:
#             print("No image to process")
#             return None
        
#         image = np.expand_dims(image, axis=0)  # Adjust as necessary for the model input

#         print("Image loaded and processed, predicting...")
#         votes = []
#         for known_good_image_path in random.sample(self.known_good_images, min(self.jury_size, len(self.known_good_images))):
#             known_good_image = self.preprocess_image(known_good_image_path)
#             if known_good_image is None:
#                 continue  # Skip if image can't be processed
#             known_good_image = np.expand_dims(known_good_image, axis=0)

#             # Prepare the pair
#             image_pair = [image, known_good_image]

#             # Make prediction
#             prediction_distance = self.siamese_model.predict(image_pair)
#             is_similar = prediction_distance < self.threshold  # Threshold to determine similarity
#             print(f"Comparing {image_path if image_path else 'screen capture'} with {known_good_image_path}: Distance = {prediction_distance}, Similar = {is_similar}")
#             votes.append(is_similar)

#         # Calculate the majority vote
#         num_similar = sum(votes)
#         majority_similar = num_similar > len(votes) / 2
#         print(f"Total votes for 'Similar': {num_similar}/{len(votes)}. Final verdict: {'Normal' if majority_similar else 'Anomalous'}")

#         return 'Normal' if majority_similar else 'Anomalous'


#     def send_image_to_api(self, base64_image, role_description, image_description_directive):
#         print("Sending image to API...")
#         payload = {
#             "model": "gpt-4-turbo",
#             "messages": [
#                 {
#                     "role": "system",
#                     "content": role_description
#                 },
#                 {
#                     "role": "user",
#                     "content": [
#                         {
#                             "type": "text",
#                             "text": image_description_directive
#                         },
#                         {
#                             "type": "image_url",
#                             "image_url": {
#                                 "url": f"data:image/jpeg;base64,{base64_image}"
#                             }
#                         }
#                     ]
#                 }
#             ],
#             "max_tokens": 300
#         }
#         response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
#         if response.status_code != 200:
#             print("Error from API:", response.status_code, response.text)
#             return None

#         try:
#             description = response.json()['choices'][0]['message']['content']
#             return description
#         except KeyError as e:
#             print("Failed to parse API response:", response.json())
#             raise e


#     def describe_image(self, image_path=None, coordinates=None, role_description="User", image_description_directive="Describe the image"):
#         try:
#             if coordinates:
#                 print("Capturing screen for description...")
#                 image = self.capture_screen(coordinates)
#             elif image_path:
#                 print("Loading image from path for description...")
#                 image = self.preprocess_image(image_path)
#             else:
#                 raise ValueError("Either image_path or coordinates must be provided.")

#             if image is None:
#                 raise ValueError("Failed to load or process image.")

#             print("Encoding image for API request...")
#             base64_image = self.encode_image(image)
#             description = self.send_image_to_api(base64_image, role_description, image_description_directive)
#             print("Description received.")
#             return description
#         except Exception as e:
#             print(f"Error in describe_image: {str(e)}")
#             return None

#     def compare_images(self, test_image):
#         print("Comparing images...")
#         # Similar implementation as previously described for Siamese model prediction
#         pass

#     def encode_image(self, image):
#         """Converts a numpy array image to JPEG base64."""
#         try:
#             if image.ndim == 3 and image.shape[2] == 1:  # Check if it's single-channel
#                 image = image.squeeze(-1)  # Remove the last dimension if it's single-channel
#             if isinstance(image, np.ndarray):
#                 # Ensure the data type is uint8
#                 image = (image * 255).clip(0, 255).astype(np.uint8)
#                 # Convert numpy array to PIL Image
#                 if image.ndim == 2:  # Grayscale
#                     image = Image.fromarray(image, 'L')
#                 else:
#                     image = Image.fromarray(image, 'RGB')
#             buffer = io.BytesIO()
#             image.save(buffer, format="JPEG")
#             encoded_string = base64.b64encode(buffer.getvalue()).decode('utf-8')
#             return encoded_string
#         except Exception as e:
#             raise ValueError(f"Error encoding image: {str(e)}")

#     def preprocess_image_from_array(self, image_array):
#         """Preprocess an image given as a NumPy array."""
#         if image_array.ndim == 3 and image_array.shape[-1] == 3:  # Assuming RGB input
#             image = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
#         image = cv2.resize(image, (152, 152))
#         image = np.array(image, dtype=np.float32) / 255.0
#         image = np.expand_dims(image, axis=-1)  # Ensure it has a single channel if needed
#         return image

In [92]:
import os
import numpy as np
import cv2
from PIL import Image, ImageGrab
import base64
import io
import requests
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda
import tensorflow as tf
from openai import OpenAI

class ModelPredictor:
    def __init__(self, siamese_model_path, known_good_images_folder, api_key_file='Eyball-OpenAI_key.txt', threshold=0.5, jury_size=12):
        self.siamese_model_path = siamese_model_path
        self.known_good_images_folder = known_good_images_folder
        self.api_key = self.load_api_key(api_key_file)
        self.client = OpenAI(api_key=self.api_key)
        self.siamese_model = self.load_siamese_model()
        self.threshold = threshold
        self.jury_size = jury_size
        self.known_good_images = self.preload_known_good_images()
        self.headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.api_key}"
        }

    def load_api_key(self, filename):
        try:
            with open(filename, 'r') as file:
                return file.read().strip()
        except FileNotFoundError:
            raise Exception(f"API key file not found: {filename}")

    def preload_known_good_images(self):
        print("Preloading known good images...")
        image_paths = []
        for root, dirs, files in os.walk(self.known_good_images_folder):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    full_path = os.path.join(root, file)
                    image_paths.append(full_path)
        return image_paths

    def load_siamese_model(self):
        input_shape = (152, 152, 1)
        input_a = Input(shape=input_shape)
        input_b = Input(shape=input_shape)
        base_network = self.initialize_base_network(input_shape)
        processed_a = base_network(input_a)
        processed_b = base_network(input_b)
        distance = Lambda(lambda tensors: tf.sqrt(tf.reduce_sum(tf.square(tensors[0] - tensors[1]), axis=1)))([processed_a, processed_b])
        model = Model(inputs=[input_a, input_b], outputs=distance)
        model.load_weights(self.siamese_model_path)
        print("Siamese model loaded successfully.")
        return model

    def initialize_base_network(self, input_shape):
        input = Input(shape=input_shape)
        x = Conv2D(64, (3, 3), activation='relu')(input)
        x = MaxPooling2D((2, 2))(x)
        x = Conv2D(128, (3, 3), activation='relu')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Flatten()(x)
        x = Dense(128, activation='relu')(x)
        return Model(inputs=input, outputs=x)

    
    def preprocess_image(self, image=None, image_path=None):
        if image_path:
            # Load image from file path
            try:
                image = Image.open(image_path)
            except FileNotFoundError as e:
                print(f"Failed to open image at {image_path}: {e}")
                return None
            except Exception as e:
                print(f"Error processing image at {image_path}: {e}")
                return None
        if isinstance(image, np.ndarray):
            # Convert to PIL Image for consistent processing
            image = Image.fromarray(image.astype('uint8'))
    
        if image is None:
            print("No image provided for preprocessing")
            return None
    
        # Convert to grayscale and resize
        image = image.convert('L')  # Convert to grayscale
        target_size = (152, 152)
        image = image.resize(target_size)
        image = np.array(image, dtype=np.float32) / 255.0  # Normalize to [0, 1]
    
        if image.ndim == 2:  # Ensure image has 3 dimensions if it's still 2D
            image = np.expand_dims(image, -1)
    
        return image





    def capture_screen(self, coordinates):
        screenshot = ImageGrab.grab(bbox=coordinates)
        return np.array(screenshot, dtype=np.uint8)  # Ensure dtype is uint8

    def predict_siamese(self, image_path=None, coordinates=None):
        if coordinates:
            print("Capturing screen...")
            captured_image = self.capture_screen(coordinates)
            if captured_image is None:
                print("Failed to capture screen")
                return None
            image = self.preprocess_image(image=captured_image)
        elif image_path:
            image = self.preprocess_image(image_path=image_path)
            if image is None:
                print("Failed to preprocess image from path")
                return None
        else:
            raise ValueError("Either image_path or coordinates must be provided.")

    def predict_siamese(self, image_path=None, coordinates=None):
        if coordinates:
            print("Capturing screen...")
            captured_image = self.capture_screen(coordinates)
            if captured_image is None:
                print("Failed to capture screen")
                return None
            image = self.preprocess_image(image=captured_image)
        elif image_path:
            image = self.preprocess_image(image_path=image_path)
        else:
            raise ValueError("Either image_path or coordinates must be provided.")
    
        if image is None:
            print("No image to process")
            return None
    
        image = np.expand_dims(image, axis=0)  # Adjust as necessary for the model input
    
        print("Image loaded and processed, predicting...")

        votes = []
        for known_good_image_path in random.sample(self.known_good_images, min(self.jury_size, len(self.known_good_images))):
            known_good_image = self.preprocess_image(image_path=known_good_image_path)
            if known_good_image is None:
                continue  # Skip if image can't be processed
            known_good_image = np.expand_dims(known_good_image, axis=0)

            # Prepare the pair
            image_pair = [image, known_good_image]

            # Make prediction
            prediction_distance = self.siamese_model.predict(image_pair)
            is_similar = prediction_distance < self.threshold  # Threshold to determine similarity
            print(f"Comparing {image_path if image_path else 'screen capture'} with {known_good_image_path}: Distance = {prediction_distance}, Similar = {is_similar}")
            votes.append(is_similar)

        # Calculate the majority vote
        num_similar = sum(votes)
        majority_similar = num_similar > len(votes) / 2
        print(f"Total votes for 'Similar': {num_similar}/{len(votes)}. Final verdict: {'Normal' if majority_similar else 'Anomalous'}")

        return 'Normal' if majority_similar else 'Anomalous'

    def describe_image(self, image_path=None, coordinates=None, role_description="User", image_description_directive="Describe the image"):
        try:
            if coordinates:
                print("Capturing screen for description...")
                image = self.capture_screen(coordinates)
            elif image_path:
                print("Loading image from path for description...")
                image = self.preprocess_image(image_path=image_path)
            else:
                raise ValueError("Either image_path or coordinates must be provided.")
    
            if image is None:
                raise ValueError("Failed to load or process image.")
    
            print("Encoding image for API request...")
            base64_image = self.encode_image(image)
            description = self.send_image_to_api(base64_image, role_description, image_description_directive)
            print("Description received.")
            return description
        except Exception as e:
            print(f"Error in describe_image: {str(e)}")
            return None

    def encode_image(self, image):
        """Converts a numpy array image to JPEG base64."""
        try:
            if image.ndim == 3 and image.shape[2] == 1:  # Check if it's single-channel
                image = image.squeeze(-1)  # Remove the last dimension if it's single-channel
            if isinstance(image, np.ndarray):
                # Ensure the data type is uint8
                image = (image * 255).clip(0, 255).astype(np.uint8)
                # Convert numpy array to PIL Image
                if image.ndim == 2:  # Grayscale
                    image = Image.fromarray(image, 'L')
                else:
                    image = Image.fromarray(image, 'RGB')
            buffer = io.BytesIO()
            image.save(buffer, format="JPEG")
            encoded_string = base64.b64encode(buffer.getvalue()).decode('utf-8')
            return encoded_string
        except Exception as e:
            raise ValueError(f"Error encoding image: {str(e)}")

    def send_image_to_api(self, base64_image, role_description, image_description_directive):
        print("Sending image to API...")
        payload = {
            "model": "gpt-4-turbo",
            "messages": [
                {
                    "role": "system",
                    "content": role_description
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": image_description_directive
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            "max_tokens": 300
        }
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=self.headers, json=payload)
        if response.status_code != 200:
            print("Error from API:", response.status_code, response.text)
            return None

        try:
            description = response.json()['choices'][0]['message']['content']
            return description
        except KeyError as e:
            print("Failed to parse API response:", response.json())
            raise e



In [93]:
predictor = ModelPredictor(siamese_model_path, known_good_images_folder, api_key_file, threshold, jury_size)

Siamese model loaded successfully.
Preloading known good images...


In [94]:
# Capture and evaluate an area of the screen ...

left = 10
right = 500
top = 10
bottom = 200

siamese_result = predictor.predict_siamese(coordinates=(left, top, right, bottom))
print ("Siamese result", siamese_result)

predictor.describe_image(coordinates=(left, top, right, bottom), role_description=role, image_description_directive=image_description_directive)

Capturing screen...
Image loaded and processed, predicting...
Comparing screen capture with D:\training_images\test\valid\zoomed\randomized_wl\59924dfb-9fba-44ad-b88a-1438253724f9_1.png: Distance = [1.1311841], Similar = [False]
Comparing screen capture with D:\training_images\test\valid\zoomed\randomized_wl\1a274c9a-0959-4a2b-9ab7-8842fc2dee70_0.png: Distance = [1.1311841], Similar = [False]
Comparing screen capture with D:\training_images\test\valid\randomized_wl\cropped\2e8223fe-01e0-42dc-bdb1-0aaa74bdff9a_1.png: Distance = [1.1311841], Similar = [False]
Comparing screen capture with D:\training_images\test\valid\dummy_class\9b92280d-d250-4cef-9538-311449bee364.png: Distance = [1.1311841], Similar = [False]
Comparing screen capture with D:\training_images\test\valid\zoomed\randomized_wl\067663c0-bb3b-426e-938c-6b4eb7c59e88_0.png: Distance = [1.097731], Similar = [False]
Comparing screen capture with D:\training_images\test\valid\randomized_wl\cropped\bf20a085-6196-4bfe-84bb-5b6a82fd

'*** ANOMALOUS ***'

In [97]:
# Or (from here on) pass in a captured and saved file

test_image_path = r'C:\temp\engineer_typing3.png'
print("Model predicts", predictor.predict_siamese(image_path=test_image_path))

actual_description = predictor.describe_image(image_path=test_image_path, role_description=role, image_description_directive=image_description_directive)
print ("LLM description", actual_description)


Image loaded and processed, predicting...
Comparing C:\temp\engineer_typing3.png with D:\training_images\test\valid\randomized_wl\cropped\8d86ad60-582d-4409-990b-9f6d15ef6374_0.png: Distance = [0.9324705], Similar = [False]
Comparing C:\temp\engineer_typing3.png with D:\training_images\test\valid\dummy_class\d4c9283f-bee6-4d8b-9a37-86fd1dc377b9.png: Distance = [1.0572661], Similar = [False]
Comparing C:\temp\engineer_typing3.png with D:\training_images\test\valid\zoomed\randomized_wl\4782ecdd-98b0-479b-8e07-512a009783d3_1.png: Distance = [1.0572661], Similar = [False]
Comparing C:\temp\engineer_typing3.png with D:\training_images\test\valid\randomized_wl\cropped\234985a4-b400-4dd8-85a4-9885251dee0c_1.png: Distance = [1.0566574], Similar = [False]
Comparing C:\temp\engineer_typing3.png with D:\training_images\test\valid\zoomed\randomized_wl\7f0d8a84-1c00-4d94-b90d-c441dbb82e38_1.png: Distance = [0.3892048], Similar = [ True]
Comparing C:\temp\engineer_typing3.png with D:\training_images

In [99]:
test_image_path = r'D:\Custom_invalid\cat.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\dummy_class\7b786536-f33f-4f61-b29a-f9f8aad246ea.png: Distance = [0.7493696], Similar = [False]
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\randomized_wl\cropped\5d8c80cb-9ae9-409c-9d20-cff63c7ba7c4_1.png: Distance = [0.9431377], Similar = [False]
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\randomized_wl\cropped\bad4fb22-813b-48b8-b28a-6f3818632dc6_0.png: Distance = [1.0546803], Similar = [False]
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\zoomed\randomized_wl\e2ef633b-06a5-4bce-b2ee-b4e97a666fe4_1.png: Distance = [1.1204376], Similar = [False]
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\randomized_wl\cropped\7c04ad64-9d77-4d95-9409-552780caa4a1_1.png: Distance = [1.0154984], Similar = [False]
Comparing D:\Custom_invalid\cat.jpg with D:\training_images\test\valid\rando

In [100]:
test_image_path = r'D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg with D:\training_images\test\valid\zoomed\randomized_wl\6fd235e4-47ea-4877-be14-5c6b4b88bf7e_1.png: Distance = [0.], Similar = [ True]
Comparing D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg with D:\training_images\test\valid\zoomed\randomized_wl\68e72bde-439f-4b9d-81ef-423d45f5b736_1.png: Distance = [0.], Similar = [ True]
Comparing D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg with D:\training_images\test\valid\zoomed\randomized_wl\b82f984b-65f5-410f-be06-dbd910375dd1_0.png: Distance = [0.], Similar = [ True]
Comparing D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg with D:\training_images\test\valid\randomized_wl\cropped\f491fabe-538b-470e-9218-ee084b603fce_1.png: Distance = [0.], Similar = [ True]
Comparing D:\custom_test_valid\internet_27f6574b96deb965217cff1aac35fc_gallery.jpg wi

In [101]:
test_image_path = r'D:\custom_test_valid\istockphoto-493741910-612x612.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\custom_test_valid\istockphoto-493741910-612x612.jpg with D:\training_images\test\valid\zoomed\randomized_wl\e9406de0-2d76-44a3-91bd-195efd0badd6_0.png: Distance = [0.14590386], Similar = [ True]
Comparing D:\custom_test_valid\istockphoto-493741910-612x612.jpg with D:\training_images\test\valid\dummy_class\3b5992f1-dfbf-486d-9281-14e4c5aa5947.png: Distance = [0.14590386], Similar = [ True]
Comparing D:\custom_test_valid\istockphoto-493741910-612x612.jpg with D:\training_images\test\valid\randomized_wl\cropped\e989e96d-4cf2-4923-a414-3f737f1ce3bd_0.png: Distance = [0.14590386], Similar = [ True]
Comparing D:\custom_test_valid\istockphoto-493741910-612x612.jpg with D:\training_images\test\valid\randomized_wl\cropped\dfae2e78-9e8f-4ab8-afd0-a3a63d024d6c_1.png: Distance = [0.14590386], Similar = [ True]
Comparing D:\custom_test_valid\istockphoto-493741910-612x612.jpg with D:\training_images\test\valid\randomized_wl\cropped\aaae8e98-5b03

In [102]:
test_image_path = r'D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung-nodules.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung-nodules.jpg with D:\training_images\test\valid\zoomed\randomized_wl\9041adb7-47b1-45df-8836-52a3efb3163e_0.png: Distance = [0.71923923], Similar = [False]
Comparing D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung-nodules.jpg with D:\training_images\test\valid\randomized_wl\cropped\44028529-7778-4fa4-910f-60feeddbedfc_1.png: Distance = [0.71923923], Similar = [False]
Comparing D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung-nodules.jpg with D:\training_images\test\valid\dummy_class\de93f13d-70a0-4bdb-b7d1-38ce241e002c.png: Distance = [0.71923923], Similar = [False]
Comparing D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung-nodules.jpg with D:\training_images\test\valid\dummy_class\3308f48d-ce94-408f-9e75-c6446c24da61.png: Distance = [0.54152286], Similar = [False]
Comparing D:\custom_test_valid\low-dose-lung-cancer-screening-with-lung

In [103]:
test_image_path = r'D:\custom_invalid\istockphoto-with_arrow.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\custom_invalid\istockphoto-with_arrow.jpg with D:\training_images\test\valid\randomized_wl\cropped\a3d34351-62ea-4373-8b67-9ec90f55f10b_0.png: Distance = [0.8484544], Similar = [False]
Comparing D:\custom_invalid\istockphoto-with_arrow.jpg with D:\training_images\test\valid\dummy_class\0825e22d-4170-403e-9632-daca06b5313a.png: Distance = [0.8484544], Similar = [False]
Comparing D:\custom_invalid\istockphoto-with_arrow.jpg with D:\training_images\test\valid\randomized_wl\cropped\b569d88a-0bba-4191-8556-74742cc13cfb_0.png: Distance = [0.8484544], Similar = [False]
Comparing D:\custom_invalid\istockphoto-with_arrow.jpg with D:\training_images\test\valid\randomized_wl\cropped\af506049-53cb-4068-9e5c-bff01f5d4798_0.png: Distance = [0.72347397], Similar = [False]
Comparing D:\custom_invalid\istockphoto-with_arrow.jpg with D:\training_images\test\valid\randomized_wl\cropped\c9128f12-639d-4024-8f74-fd7ffb95357c_1.png: Distance = [0.8199599

In [104]:
test_image_path = r'D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg with D:\training_images\test\valid\zoomed\randomized_wl\e3826313-8744-4378-8796-536b7db7ed98_0.png: Distance = [1.1311321], Similar = [False]
Comparing D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg with D:\training_images\test\valid\dummy_class\f9c4529a-d2cf-4c73-8beb-d5502eeafce4.png: Distance = [1.1311321], Similar = [False]
Comparing D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg with D:\training_images\test\valid\randomized_wl\cropped\1bcb29aa-92e6-415c-8b9a-cb8477aaf506_1.png: Distance = [1.1311321], Similar = [False]
Comparing D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg with D:\training_images\test\valid\zoomed\randomized_wl\7297dff2-4fc1-4782-ac30-3643220591e2_1.png: Distance = [1.1311321], Similar = [False]
Comparing D:\custom_invalid\Lung_abscess_-_CT_with_overlay.jpg with D:\training_images\test\valid\zoomed\randomized_wl\df13097c-cf2f-4866-82b9-3fb58

In [105]:
test_image_path = r'D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png with D:\training_images\test\valid\randomized_wl\cropped\00df59d8-dde0-469e-8fb8-792b2ef69778_1.png: Distance = [0.58764404], Similar = [False]
Comparing D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png with D:\training_images\test\valid\zoomed\randomized_wl\3c142d11-80f8-4c46-9ea8-1b438e4d2a08_1.png: Distance = [0.6476503], Similar = [False]
Comparing D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png with D:\training_images\test\valid\dummy_class\7be10dbf-0923-43ef-8257-48fd80882876.png: Distance = [0.6476503], Similar = [False]
Comparing D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png with D:\training_images\test\valid\randomized_wl\cropped\2db820b6-658a-47b8-964b-2ee047c88e84_1.png: Distance = [0.6476503], Similar = [False]
Comparing D:\Custom_invalid\augmented_0abe42cc-623a-46f2-91ee-be4f339ff73b.png wit

In [106]:
test_image_path = r'C:\temp\medical_image_zoomed_more_resized_modified_aspect_ratio_hairlines.png'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing C:\temp\medical_image_zoomed_more_resized_modified_aspect_ratio_hairlines.png with D:\training_images\test\valid\dummy_class\733dc5ac-9793-4a7f-afef-bd4abd186d8d.png: Distance = [0.98369527], Similar = [False]
Comparing C:\temp\medical_image_zoomed_more_resized_modified_aspect_ratio_hairlines.png with D:\training_images\test\valid\dummy_class\35d5f4c2-091d-475d-be7a-49054211db11.png: Distance = [0.98369527], Similar = [False]
Comparing C:\temp\medical_image_zoomed_more_resized_modified_aspect_ratio_hairlines.png with D:\training_images\test\valid\zoomed\randomized_wl\8d1dd3a0-a116-4c86-a2f4-104a6408f61c_0.png: Distance = [0.6074642], Similar = [False]
Comparing C:\temp\medical_image_zoomed_more_resized_modified_aspect_ratio_hairlines.png with D:\training_images\test\valid\zoomed\randomized_wl\3b12928e-9db7-4491-b1ff-032a074eacc2_1.png: Distance = [0.6223738], Similar = [False]
Comparing C:\temp\medical_image_zoomed_more_resized_modifi

In [108]:
test_image_path = r'D:\Custom_invalid\internet-gettyimages-1320918955-612x612_small_label.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))

Image loaded and processed, predicting...
Comparing D:\Custom_invalid\internet-gettyimages-1320918955-612x612_small_label.jpg with D:\training_images\test\valid\randomized_wl\cropped\b35deb13-2b88-48e2-8073-76d0ab060d04_0.png: Distance = [0.77528584], Similar = [False]
Comparing D:\Custom_invalid\internet-gettyimages-1320918955-612x612_small_label.jpg with D:\training_images\test\valid\randomized_wl\cropped\21909e1b-e959-4682-ad49-4b7f7d51d043_1.png: Distance = [0.77528584], Similar = [False]
Comparing D:\Custom_invalid\internet-gettyimages-1320918955-612x612_small_label.jpg with D:\training_images\test\valid\dummy_class\76553407-3d82-43bb-b2e2-077e8b38cd12.png: Distance = [0.77528584], Similar = [False]
Comparing D:\Custom_invalid\internet-gettyimages-1320918955-612x612_small_label.jpg with D:\training_images\test\valid\dummy_class\b3fbf5e3-7e02-4605-a81a-5a21f26740e6.png: Distance = [0.77528584], Similar = [False]
Comparing D:\Custom_invalid\internet-gettyimages-1320918955-612x612_sm

In [109]:
test_image_path = r'D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg'
print("Model predicts", predictor.predict_siamese(test_image_path))

predictor.describe_image(test_image_path, None, role, image_description_directive)
print ("LLM description", predictor.describe_image(test_image_path, None, role, image_description_directive))


Image loaded and processed, predicting...
Comparing D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg with D:\training_images\test\valid\dummy_class\e77a1fc0-79ca-448f-b40a-48422ddb97e2.png: Distance = [0.45146817], Similar = [ True]
Comparing D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg with D:\training_images\test\valid\randomized_wl\cropped\d2e9071f-9d08-443b-8f34-364ee11d9105_0.png: Distance = [0.45649433], Similar = [ True]
Comparing D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg with D:\training_images\test\valid\randomized_wl\cropped\7da50805-7bb4-445e-bcb7-e3e07fab64d6_1.png: Distance = [0.45649433], Similar = [ True]
Comparing D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg with D:\training_images\test\valid\zoomed\randomized_wl\de93f13d-70a0-4bdb-b7d1-38ce241e002c_1.png: Distance = [0.45649433], Similar = [ True]
Comparing D:\Custom_test_valid\internet-gettyimages-1322138871-612x612.jpg with D:\training_ima

In [110]:
def evaluate_methods_simplified(base_folder, sample_size=40, jury_size=12, role="User", image_description_directive="Describe the image"):
    valid_folder = os.path.join(base_folder, 'valid')
    invalid_folder = os.path.join(base_folder, 'invalid')

    # Ensure directories exist
    if not os.path.exists(valid_folder) or not os.path.exists(invalid_folder):
        raise ValueError("One or more image directories do not exist.")

    # List and sample images
    valid_images = random.sample(os.listdir(valid_folder), min(sample_size, len(os.listdir(valid_folder))))
    invalid_images = random.sample(os.listdir(invalid_folder), min(sample_size, len(os.listdir(invalid_folder))))

    # Initialize predictions for both methods
    predictions_siamese = []
    predictions_gpt = []

    # Initialize actual values
    actuals = [1] * len(valid_images) + [0] * len(invalid_images)  # 1 for normal, 0 for anomalous

    # Process sampled valid and invalid images
    for filename in valid_images + invalid_images:
        print ("Filename", filename)
        folder = valid_folder if filename in valid_images else invalid_folder
        image_path = os.path.join(folder, filename)

        # Siamese Network Prediction
        siamese_result = predictor.predict_siamese(image_path)
        predictions_siamese.append(siamese_result)
        print ("Siamese result", siamese_result)

        # GPT Vision Direct Analysis Prediction
        description_result = predictor.describe_image(image_path, None, role, image_description_directive)
        predictions_gpt.append('NORMAL' in description_result)
        print ("API result", description_result)

    # Assuming that True/False predictions from Siamese network are correct and just need flattening:
    predictions_siamese = [int(pred.flatten()[0]) for pred in predictions_siamese]
    
    # Convert GPT predictions from True/False to 0/1 as well:
    predictions_gpt = [int(pred) for pred in predictions_gpt]
    
    # Recalculate the metrics:
    accuracy_s = accuracy_score(actuals, predictions_siamese)
    precision_s = precision_score(actuals, predictions_siamese)
    recall_s = recall_score(actuals, predictions_siamese)
    f1_s = f1_score(actuals, predictions_siamese)
    
    accuracy_g = accuracy_score(actuals, predictions_gpt)
    precision_g = precision_score(actuals, predictions_gpt)
    recall_g = recall_score(actuals, predictions_gpt)
    f1_g = f1_score(actuals, predictions_gpt)
    
    print('Evaluation Results - Siamese Model:', {
        'accuracy': accuracy_s,
        'precision': precision_s,
        'recall': recall_s,
        'f1': f1_s
    })
    print('Evaluation Results - GPT Model:', {
        'accuracy': accuracy_g,
        'precision': precision_g,
        'recall': recall_g,
        'f1': f1_g
    })

    return {
        "siamese": {"accuracy": accuracy_s, "precision": precision_s, "recall": recall_s, "f1": f1_s},
        "gpt": {"accuracy": accuracy_g, "precision": precision_g, "recall": recall_g, "f1": f1_g}
    }




In [None]:
# Example call
base_folder = r'D:\model_comparison_test'
sample_size = 40
try:
    results = evaluate_methods_simplified(base_folder, sample_size, jury_size, role, image_description_directive)
    print("Evaluation Results:", results)
except Exception as e:
    print("Error during evaluation:", str(e))


Filename 00df59d8-dde0-469e-8fb8-792b2ef69778.png
Image loaded and processed, predicting...
Comparing D:\model_comparison_test\valid\00df59d8-dde0-469e-8fb8-792b2ef69778.png with D:\training_images\test\valid\randomized_wl\cropped\aa61583b-497d-43c1-b998-729a9af42a90_0.png: Distance = [0.14239636], Similar = [ True]
Comparing D:\model_comparison_test\valid\00df59d8-dde0-469e-8fb8-792b2ef69778.png with D:\training_images\test\valid\zoomed\randomized_wl\7a688174-453f-4e22-b068-37ad4cf8c3f9_0.png: Distance = [0.], Similar = [ True]
Comparing D:\model_comparison_test\valid\00df59d8-dde0-469e-8fb8-792b2ef69778.png with D:\training_images\test\valid\zoomed\randomized_wl\21d5125d-5dae-48f4-a71e-62a9e551a954_1.png: Distance = [0.], Similar = [ True]
Comparing D:\model_comparison_test\valid\00df59d8-dde0-469e-8fb8-792b2ef69778.png with D:\training_images\test\valid\zoomed\randomized_wl\b2df743e-0f8d-4fe0-8fd9-80500a138c5c_0.png: Distance = [0.], Similar = [ True]
Comparing D:\model_comparison_t