In [1]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        self.root.geometry("800x700")  # Increased width for Gemini output
        self.root.config(bg="#ADD8E6")  # Light blue color

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None # Initialize Gemini client later

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # Replace with your actual API key
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the analyze button (initially hidden)
        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()  # Initially hide the analyze button

        # Create and pack the save button (initially hidden)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()  # Initially hide the save button

        # Create a Text widget to display Gemini's response
        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED) # Make it read-only

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def update_feed(self):
        """Update the camera feed continuously."""
        ret, frame = self.cap.read()
        if ret:
            frame = cv2.flip(frame, 1)  # Flip horizontally for mirror effect
            self.frame = frame
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img)
            imgtk = ImageTk.PhotoImage(image=img)

            # Store the image reference to prevent garbage collection
            self.canvas.imgtk = imgtk
            self.canvas.configure(image=imgtk)

        # Update the feed every 10 ms
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame from the camera feed."""
        self.captured_image = self.frame.copy()
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)

        # Display the captured image on the canvas
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide the capture button and show the analyze and save buttons
        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)

        # Reset Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured. Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal") # Enable analyze button after save

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis."""
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.capture_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        is_success, buffer = cv2.imencode(".jpg", self.captured_image)
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons()
            return

        image_bytes = BytesIO(buffer)

        # Prepare the image for Gemini API
        # The genai.upload_file method now handles BytesIO directly, or you can send it as a part
        # within the generate_content call directly if you use the new Parts structure.
        # For this example, we'll send it directly as part of the contents.

        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image."

        try:
            # Create a GenerativeModel instance
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            # Send image and prompt to Gemini
            response = model.generate_content(
                contents=[
                    genai.upload_file(image_bytes.getvalue(), mime_type="image/jpeg"),
                    prompt
                ]
            )

            # Display the response
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
        finally:
            self.reset_buttons()

    def reset_buttons(self):
        """Resets the state of the buttons after an action."""
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.capture_button.pack(side=tk.LEFT, padx=10) # Show capture button again
        self.analyze_button.pack_forget() # Hide analyze button
        self.save_button.pack_forget() # Hide save button


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

  from .autonotebook import tqdm as notebook_tqdm


You can get one from https://makers.generativeai.google/key
Photo saved as captured_image_20250608_142347.jpg
Exiting...


In [None]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        self.root.geometry("800x700")  # Increased width for Gemini output
        self.root.config(bg="#ADD8E6")  # Light blue color

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Flag to control camera feed updates

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        # It's highly recommended to load this from an environment variable for security:
        # self.api_key = os.getenv("GEMINI_API_KEY")
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # Replace with your actual API key
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the analyze button (initially hidden)
        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()  # Initially hide the analyze button

        # Create and pack the save button (initially hidden)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()  # Initially hide the save button

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget() # Initially hide the retake button

        # Create a Text widget to display Gemini's response
        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED) # Make it read-only

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def update_feed(self):
        """Update the camera feed continuously if self.is_capturing is True."""
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)  # Flip horizontally for mirror effect
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        # Update the feed every 10 ms regardless of whether it's live or frozen
        # This allows the GUI to remain responsive.
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame and freeze the feed."""
        self.is_capturing = False # Stop updating the live feed
        self.captured_image = self.frame.copy() # Use the last live frame

        # Display the captured image on the canvas (freezing the image)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show analyze, save, and retake buttons
        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        # Reset Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured. Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True # Resume live feed
        self.captured_image = None # Clear captured image

        # Hide analyze, save, retake buttons, show capture button
        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Clear Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal") # Enable analyze button after save

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and display result."""
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        is_success, buffer = cv2.imencode(".jpg", self.captured_image)
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes = BytesIO(buffer.tobytes()) # .tobytes() is crucial here

        # Prepare the prompt for Gemini
        # Adjusted prompt to focus on medication names or general description
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            # Send image and prompt to Gemini
            response = model.generate_content(
                contents=[
                    {"mime_type": "image/jpeg", "data": image_bytes.getvalue()}, # Direct data sending
                    {"text": prompt}
                ]
            )

            # Display the response
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Error: image "pyimage2175" doesn't exist


: 

In [2]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        self.root.geometry("720x1280")  # Increased width for Gemini output
        self.root.config(bg="#ADD8E6")  # Light blue color

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Flag to control camera feed updates

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        # It's highly recommended to load this from an environment variable for security:
        # self.api_key = os.getenv("GEMINI_API_KEY")
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # Replace with your actual API key
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Bind the window close protocol to a custom method
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the analyze button (initially hidden)
        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()  # Initially hide the analyze button

        # Create and pack the save button (initially hidden)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()  # Initially hide the save button

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget() # Initially hide the retake button

        # Create a Text widget to display Gemini's response
        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED) # Make it read-only

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        """Handle the window closing event (e.g., clicking the 'X' button)."""
        print("Closing window...")
        self.release_camera()
        self.root.destroy() # Properly closes the Tkinter window

    def update_feed(self):
        """Update the camera feed continuously if self.is_capturing is True."""
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)  # Flip horizontally for mirror effect
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        # Update the feed every 10 ms regardless of whether it's live or frozen
        # This allows the GUI to remain responsive.
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame and freeze the feed."""
        self.is_capturing = False # Stop updating the live feed
        self.captured_image = self.frame.copy() # Use the last live frame

        # Display the captured image on the canvas (freezing the image)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show analyze, save, and retake buttons
        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        # Reset Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured. Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True # Resume live feed
        self.captured_image = None # Clear captured image

        # Hide analyze, save, retake buttons, show capture button
        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Clear Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal") # Enable analyze button after save

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and display result."""
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        is_success, buffer = cv2.imencode(".jpg", self.captured_image)
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes = BytesIO(buffer.tobytes()) # .tobytes() is crucial here

        # Prepare the prompt for Gemini
        # Adjusted prompt to focus on medication names or general description
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            # Send image and prompt to Gemini
            response = model.generate_content(
                contents=[
                    {"mime_type": "image/jpeg", "data": image_bytes.getvalue()}, # Direct data sending
                    {"text": prompt}
                ]
            )

            # Display the response
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Photo saved as captured_image_20250608_144714.jpg
Closing window...


In [3]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        self.root.geometry("720x1280")  # Increased width for Gemini output
        self.root.config(bg="#ADD8E6")  # Light blue color

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Flag to control camera feed updates

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # Replace with your actual API key
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Bind the window close protocol to a custom method
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the analyze button (initially hidden)
        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()  # Initially hide the analyze button

        # Create and pack the save button (initially hidden)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()  # Initially hide the save button

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget() # Initially hide the retake button

        # Create a Text widget to display Gemini's response
        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED) # Make it read-only

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        """Handle the window closing event (e.g., clicking the 'X' button)."""
        print("Closing window...")
        self.release_camera()
        self.root.destroy() # Properly closes the Tkinter window

    def update_feed(self):
        """Update the camera feed continuously if self.is_capturing is True."""
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)  # Flip horizontally for mirror effect
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        # Update the feed every 10 ms regardless of whether it's live or frozen
        # This allows the GUI to remain responsive.
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame and freeze the feed."""
        self.is_capturing = False # Stop updating the live feed
        self.captured_image = self.frame.copy() # Use the last live frame

        # Display the captured image on the canvas (freezing the image)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show analyze, save, and retake buttons
        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        # Reset Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured. Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True # Resume live feed
        self.captured_image = None # Clear captured image

        # Hide analyze, save, retake buttons, show capture button
        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Clear Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal") # Enable analyze button after save

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and display result."""
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # --- DEBUG STEP: Save the image that will be sent to Gemini ---
        debug_filename = "debug_image_before_gemini_send.jpg"
        cv2.imwrite(debug_filename, self.captured_image)
        print(f"DEBUG: Image saved to '{debug_filename}' for manual testing with standalone script.")
        # --- END DEBUG STEP ---

        # Convert the captured image (numpy array) to bytes
        # Try different JPEG quality settings if needed (e.g., 90 instead of default 95)
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        # Prepare the prompt for Gemini
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'") # Print prompt for verification

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash') # Ensure this is the exact model used in standalone

            # Send image and prompt to Gemini using genai.upload_file
            # This is generally robust for sending image data.
            response = model.generate_content(
                contents=[
                    genai.upload_file(image_bytes_data, mime_type="image/jpeg"),
                    {"text": prompt}
                ]
            )

            # Display the response
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"DEBUG: Gemini Response: {response.text}") # Print response to console as well

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"ERROR: Gemini API call failed: {e}") # Print error to console
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Photo saved as captured_image_20250608_145558.jpg
DEBUG: Image saved to 'debug_image_before_gemini_send.jpg' for manual testing with standalone script.
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
ERROR: Gemini API call failed: argument should be a str or an os.PathLike object where __fspath__ returns a str, not 'bytes'
DEBUG: Image saved to 'debug_image_before_gemini_send.jpg' for manual testing with standalone script.
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
ERROR: Gemini API call failed: argument should be a str or an os.PathLike object where __fspath__ returns a str, not 'bytes'
Photo saved as captured_image_20250608_145622.jpg
DEBUG: Image

In [4]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        self.root.geometry("720x1820")  # Increased width for Gemini output
        self.root.config(bg="#ADD8E6")  # Light blue color

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Flag to control camera feed updates

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        # It's highly recommended to load this from an environment variable for security:
        # self.api_key = os.getenv("GEMINI_API_KEY")
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Bind the window close protocol to a custom method
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the analyze button (initially hidden)
        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()  # Initially hide the analyze button

        # Create and pack the save button (initially hidden)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()  # Initially hide the save button

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget() # Initially hide the retake button

        # Create a Text widget to display Gemini's response
        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED) # Make it read-only

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        """Handle the window closing event (e.g., clicking the 'X' button)."""
        print("Closing window...")
        self.release_camera()
        self.root.destroy() # Properly closes the Tkinter window

    def update_feed(self):
        """Update the camera feed continuously if self.is_capturing is True."""
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)  # Flip horizontally for mirror effect
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        # Update the feed every 10 ms regardless of whether it's live or frozen
        # This allows the GUI to remain responsive.
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame and freeze the feed."""
        self.is_capturing = False # Stop updating the live feed
        self.captured_image = self.frame.copy() # Use the last live frame

        # Display the captured image on the canvas (freezing the image)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show analyze, save, and retake buttons
        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        # Reset Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured. Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True # Resume live feed
        self.captured_image = None # Clear captured image

        # Hide analyze, save, retake buttons, show capture button
        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Clear Gemini response text
        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal") # Enable analyze button after save

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and display result."""
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # --- DEBUG STEP (Optional: for verifying image sent) ---
        # debug_filename = "debug_image_before_gemini_send.jpg"
        # cv2.imwrite(debug_filename, self.captured_image)
        # print(f"DEBUG: Image saved to '{debug_filename}' for manual testing with standalone script.")
        # --- END DEBUG STEP ---

        # Convert the captured image (numpy array) to bytes
        # Using a good quality setting for JPEG (95 is high quality, default is often 95)
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        # Prepare the prompt for Gemini
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'") # Print prompt for verification

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash') # Ensure this is the exact model used in standalone

            # Correct way to send in-memory bytes data to generate_content
            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",  # Specify the MIME type of the image
                        "data": image_bytes_data    # Provide the raw bytes data
                    },
                    {"text": prompt}                # Provide the text prompt
                ]
            )

            # Display the response
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"DEBUG: Gemini Response: {response.text}") # Print response to console as well

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"ERROR: Gemini API call failed: {e}") # Print error to console
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Photo saved as captured_image_20250608_145954.jpg
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
DEBUG: Gemini Response: Based on the image, here are the medications listed in the prescription:

*   Xordil
*   Telmisartan
*   Benerva

If this is not a prescription, the main objects in the image are: a prescription and hand.
Closing window...


In [5]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        self.root.geometry("720x1280")
        self.root.config(bg="#ADD8E6")

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()

        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget()

        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED)

        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)
                self.frame = frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and invert colors."""
        self.is_capturing = False
        self.captured_image = self.frame.copy()

        # --- NEW: Invert colors of the captured image ---
        self.captured_image = cv2.bitwise_not(self.captured_image)
        # --- END NEW ---

        # Display the captured image on the canvas (now inverted)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured (and inverted). Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        self.is_capturing = True
        self.captured_image = None

        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal")

    def analyze_image_with_gemini(self):
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"DEBUG: Gemini Response: {response.text}")

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")

    def release_camera(self):
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
DEBUG: Gemini Response: The image appears to be of a document with some handwriting on it. It's difficult to read clearly, but it may contain the following words: xordal, os prapet, alcallate.

Given the blurry nature of the image, it is difficult to identify specific medications, if any, with certainty.
Photo saved as captured_image_20250608_150210.jpg
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
DEBUG: Gemini Response: Based on the image provided, it's difficult to definitively identify the medications listed due to the unclear handwriting. It appears to be a prescription note. The main objects are:

*

In [8]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        self.root.geometry("800x700")
        self.root.config(bg="#ADD8E6")

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()

        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget()

        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED)

        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1)  # This is the line that mirrors the live feed
                self.frame = frame # Store the current live frame, which is already mirrored
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame and freeze the feed. The captured image is already mirrored from update_feed."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Captures the already-mirrored frame

        # NO cv2.bitwise_not() here, as we want the mirrored image, not inverted colors.

        # Display the captured image on the canvas (which is already mirrored)
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured (mirrored). Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        self.is_capturing = True
        self.captured_image = None

        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image) # Saves the mirrored image
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal")

    def analyze_image_with_gemini(self):
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        # This will convert the already mirrored captured_image
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"DEBUG: Gemini Response: {response.text}")

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")

    def release_camera(self):
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Closing window...


In [7]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        self.root.geometry("800x700")
        self.root.config(bg="#ADD8E6")

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.analyze_button = Button(self.button_frame, text="Analyze with Gemini", command=self.analyze_image_with_gemini,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.analyze_button.pack_forget()

        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack_forget()

        self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg="#ADD8E6", fg="navy")
        self.gemini_response_label.pack(pady=5)
        self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        self.gemini_response_text.pack(pady=10, padx=20)
        self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        self.gemini_response_text.config(state=tk.DISABLED)

        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                # We'll remove the live flip here to make the captured flip more explicit
                # If you want the live feed to also be mirrored, keep this line.
                # For this specific request, we'll flip AFTER capture.
                # frame = cv2.flip(frame, 1) # Removed for now, will flip after capture.
                self.frame = frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and then flip it horizontally."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (unflipped) frame

        # --- NEW: Flip the captured image horizontally ---
        self.captured_image = cv2.flip(self.captured_image, 1) # 1 for horizontal flip
        # --- END NEW ---

        # Display the captured (now flipped) image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        self.capture_button.pack_forget()
        self.analyze_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack(side=tk.LEFT, padx=10)
        self.retake_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Image captured (and flipped horizontally). Click 'Analyze with Gemini' or 'Save Photo'.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def retake_photo(self):
        self.is_capturing = True
        self.captured_image = None

        self.analyze_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Live feed active. Capture a new image.")
        self.gemini_response_text.config(state=tk.DISABLED)

    def save_photo(self):
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image) # Saves the horizontally flipped image
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            self.analyze_button.config(state="normal")

    def analyze_image_with_gemini(self):
        if self.captured_image is None:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Please capture an image first.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        if not self.gemini_client:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Gemini API is not configured. Please check your API key.")
            self.gemini_response_text.config(state=tk.DISABLED)
            return

        self.gemini_response_text.config(state=tk.NORMAL)
        self.gemini_response_text.delete(1.0, tk.END)
        self.gemini_response_text.insert(tk.END, "Analyzing image with Gemini... Please wait.")
        self.gemini_response_text.config(state=tk.DISABLED)
        self.analyze_button.config(state="disabled", text="Analyzing...")
        self.save_button.config(state="disabled")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array, already flipped) to bytes
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, "Error encoding image for Gemini API.")
            self.gemini_response_text.config(state=tk.DISABLED)
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, response.text)
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"DEBUG: Gemini Response: {response.text}")

        except Exception as e:
            self.gemini_response_text.config(state=tk.NORMAL)
            self.gemini_response_text.delete(1.0, tk.END)
            self.gemini_response_text.insert(tk.END, f"Error during Gemini analysis: {e}")
            self.gemini_response_text.config(state=tk.DISABLED)
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.reset_buttons_after_analysis()

    def reset_buttons_after_analysis(self):
        self.analyze_button.config(state="normal", text="Analyze with Gemini")
        self.save_button.config(state="normal", text="Save Photo")
        self.retake_button.config(state="normal")

    def release_camera(self):
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Photo saved as captured_image_20250608_151255.jpg
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'
DEBUG: Gemini Response: Based on the image and the cropped hints, it seems to be a partially visible handwritten document, possibly a receipt or an order, rather than a prescription.

However, within the rectangular box, some words that might be medications or related terms appear to be written:

*   "Sidomy M"
*   "teraz"
*  "Osmatine"
*  "selinog"

It's very difficult to be certain due to the illegible handwriting and lack of context. If it's not a prescription, the main objects are the paper with handwritten notes and a blue stamp-like marking in the bottom corner. There's also a hand partially visible to the right.
DEBUG: Prompt being sent: 'Identify the names of all the medications listed 

In [9]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext # scrolledtext may not be needed now
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Set the window size and background color
        # Reduced height since Gemini response box is removed
        self.root.geometry("800x600")
        self.root.config(bg="#ADD8E6")

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        # Configure Gemini API (consider loading from environment variable in production)
        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        # Bind the window close protocol to a custom method
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg="#ADD8E6")
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg="#ADD8E6")
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg="#4CAF50", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#45a049", activeforeground="white")
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the Confirm button (initially hidden, will replace Capture)
        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.confirm_and_analyze_photo,
                                     font=("Arial", 14), bg="#FF9800", fg="white", relief="flat", bd=0,
                                     padx=20, pady=10, activebackground="#F57C00", activeforeground="white")
        self.confirm_button.pack_forget() # Initially hidden

        # Create and pack the Save button (initially hidden, appears after confirm/analysis)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg="#2196F3", fg="white", relief="flat", bd=0,
                                  padx=20, pady=10, activebackground="#1976D2", activeforeground="white")
        self.save_button.pack_forget() # Initially hidden

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg="#778899", fg="white", relief="flat", bd=0,
                                    padx=20, pady=10, activebackground="#6A7788", activeforeground="white")
        self.retake_button.pack_forget() # Initially hidden

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        # Start updating the camera feed
        self.update_feed()

    def on_closing(self):
        """Handle the window closing event (e.g., clicking the 'X' button)."""
        print("Closing window...")
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        """Update the camera feed continuously if self.is_capturing is True."""
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                # Mirror the live feed for user convenience
                frame = cv2.flip(frame, 1)
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and prepare for confirmation."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (mirrored) frame

        # Display the captured image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget() # Ensure save button is hidden at this stage

        print("Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.")

    def confirm_and_analyze_photo(self):
        """Confirm the captured photo and send it to Gemini for analysis."""
        if self.captured_image is None:
            print("ERROR: No image to confirm. Please capture an image first.")
            return

        print("Analyzing image with Gemini... Please wait.")
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled")

        # Convert the captured image (numpy array) to bytes
        # Using a good quality setting for JPEG (95 is high quality, default is often 95)
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            print("ERROR: Error encoding image for Gemini API.")
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        # Prepare the prompt for Gemini
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Print the response to the Terminal
            print("\n--- Gemini Analysis Result ---")
            print(response.text)
            print("------------------------------\n")

        except Exception as e:
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.reset_buttons_after_analysis() # Reset buttons after analysis/error

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True
        self.captured_image = None

        # Hide Confirm, Save, Retake buttons, show Capture button
        self.confirm_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        print("Live feed active. Capture a new image.")

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")
            # If you want to allow re-analysis after saving, uncomment the next line
            # self.confirm_button.config(state="normal", text="Confirm")

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.save_button.pack(side=tk.LEFT, padx=10) # Show save button after analysis
        self.save_button.config(state="normal", text="Save Photo") # Re-enable save button
        self.retake_button.config(state="normal") # Enable retake button


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.
Analyzing image with Gemini... Please wait.
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'

--- Gemini Analysis Result ---
Here are the medications listed on the prescription:

*   Librax
*   Bepra 20
*   Prucasoft 2mL
------------------------------

Photo saved as captured_image_20250608_180132.jpg
Closing window...


In [None]:
import cv2
import tkinter as tk
from tkinter import Label, Button, Text, scrolledtext
from PIL import Image, ImageTk
import time
import os

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F" # Darker blue, like the sidebar
        self.medium_blue = "#3B6DAA" # A slightly lighter blue for some buttons
        self.orange_yellow = "#F2A83B" # For the Confirm button
        self.light_text_color = "white" # For text on dark backgrounds
        self.neutral_button_blue = "#8DA8C6" # For Retake button

        # Set the window size and background color
        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue) # Main window background

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg=self.dark_blue) # Canvas background matches main window
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg=self.dark_blue) # Button frame background matches main window
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the Confirm button (initially hidden, will replace Capture)
        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.confirm_and_analyze_photo,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        # Create and pack the Save button (initially hidden, appears after confirm/analysis)
        self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
                                  font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                  relief="flat", bd=0, padx=20, pady=10,
                                  activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.save_button.pack_forget()

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        # No Gemini response Text widget
        # self.gemini_response_label = Label(root, text="Gemini Analysis:", font=("Arial", 12), bg=self.dark_blue, fg="navy")
        # self.gemini_response_label.pack(pady=5)
        # self.gemini_response_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=90, height=10, font=("Arial", 10), bg="#E0F2F7", fg="black")
        # self.gemini_response_text.pack(pady=10, padx=20)
        # self.gemini_response_text.insert(tk.END, "Capture an image and click 'Analyze with Gemini' to get a response.")
        # self.gemini_response_text.config(state=tk.DISABLED)

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        # Start updating the camera feed
        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and prepare for confirmation."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (mirrored) frame

        # Display the captured image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)
        self.save_button.pack_forget() # Ensure save button is hidden at this stage

        print("Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.")

    def confirm_and_analyze_photo(self):
        """Confirm the captured photo and send it to Gemini for analysis."""
        if self.captured_image is None:
            print("ERROR: No image to confirm. Please capture an image first.")
            return

        print("Analyzing image with Gemini... Please wait.")
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled")

        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            print("ERROR: Error encoding image for Gemini API.")
            self.reset_buttons_after_analysis()
            return

        image_bytes_data = buffer.tobytes()

        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Print the response to the Terminal
            print("\n--- Gemini Analysis Result ---")
            print(response.text)
            print("------------------------------\n")

        except Exception as e:
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.reset_buttons_after_analysis() # Reset buttons after analysis/error

    def retake_photo(self):
        """Return to live camera feed."""
        self.is_capturing = True
        self.captured_image = None

        # Hide Confirm, Save, Retake buttons, show Capture button
        self.confirm_button.pack_forget()
        self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        print("Live feed active. Capture a new image.")

    def save_photo(self):
        """Save the captured photo with a unique filename."""
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"captured_image_{timestamp}.jpg"

            cv2.imwrite(filename, self.captured_image)
            print(f"Photo saved as {filename}")

            self.save_button.config(text="Saved!", state="disabled")


    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.save_button.pack(side=tk.LEFT, padx=10) # Show save button after analysis
        self.save_button.config(state="normal", text="Save Photo") # Re-enable save button
        self.retake_button.config(state="normal") # Enable retake button


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.
Analyzing image with Gemini... Please wait.
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'

--- Gemini Analysis Result ---
The image appears to be a whiteboard or large sheet of paper with handwritten notes, likely brainstorming or planning. The notes mention "Social Connect," "set-up," "Metadata," "good," "Customer Reviews," "Results," "Validation," and "Magnification." It's not a prescription, so there are no medications listed.

------------------------------

Photo saved as captured_image_20250608_182342.jpg
Live feed active. Capture a new image.
Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.


KeyboardInterrupt: 

: 

In [1]:
import cv2
import tkinter as tk
from tkinter import Label, Button
from PIL import Image, ImageTk
import time
import os
import threading # To run analysis in a separate thread

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F" # Darker blue, like the sidebar
        self.medium_blue = "#3B6DAA" # A slightly lighter blue for some buttons
        self.orange_yellow = "#F2A83B" # For the Confirm button
        self.light_text_color = "white" # For text on dark backgrounds
        self.neutral_button_blue = "#8DA8C6" # For Retake button

        # Set the window size and background color
        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue)

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True
        self.analysis_thread = None # To hold the analysis thread
        self.analysis_requested = False # Flag to track if analysis is requested

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg=self.dark_blue)
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg=self.dark_blue)
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the Confirm button (initially hidden, will replace Capture)
        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.start_analysis_thread,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        # REMOVED: Save button
        # self.save_button = Button(self.button_frame, text="Save Photo", command=self.save_photo,
        #                           font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
        #                           relief="flat", bd=0, padx=20, pady=10,
        #                           activebackground="#2D5A8A", activeforeground=self.light_text_color)
        # self.save_button.pack_forget()

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        # Start updating the camera feed
        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        # Attempt to join analysis thread if it's still running
        if self.analysis_thread and self.analysis_thread.is_alive():
            print("Waiting for analysis thread to finish...")
            # Set flag to stop analysis if it's checking it
            self.analysis_requested = False # This can be used in analyze_image_with_gemini to break early
            self.analysis_thread.join(timeout=1) # Give it a moment to finish cleanly
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and prepare for confirmation."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (mirrored) frame
        self.analysis_requested = False # Reset analysis flag for new capture

        # Display the captured image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)
        # REMOVED: save_button.pack_forget()

        print("Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.")

    def start_analysis_thread(self):
        """Starts the Gemini analysis in a separate thread to keep GUI responsive."""
        if self.analysis_thread and self.analysis_thread.is_alive():
            print("Analysis already in progress. Please wait.")
            return

        self.analysis_requested = True # Set flag indicating analysis is desired
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled") # Disable retake during analysis

        print("Starting Gemini analysis in background...")
        self.analysis_thread = threading.Thread(target=self.analyze_image_with_gemini)
        self.analysis_thread.start()

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and print result."""
        if self.captured_image is None:
            print("ERROR: No image to analyze. This should not happen after capture.")
            self.root.after(0, self.reset_buttons_after_analysis) # Use after for GUI updates from thread
            return

        if not self.gemini_client:
            print("ERROR: Gemini API is not configured. Please check your API key.")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        # Check if analysis was cancelled while waiting
        if not self.analysis_requested:
            print("Analysis cancelled by user (e.g., retake pressed).")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            print("ERROR: Error encoding image for Gemini API.")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        image_bytes_data = buffer.tobytes()
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Check if analysis was cancelled during API call (less likely but possible)
            if not self.analysis_requested:
                print("Analysis result received but ignored (cancelled by user).")
                return # Don't update GUI if cancelled

            # Print the response to the Terminal
            print("\n--- Gemini Analysis Result ---")
            print(response.text)
            print("------------------------------\n")

        except Exception as e:
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            # All GUI updates from a non-main thread must be scheduled using root.after
            self.root.after(0, self.reset_buttons_after_analysis)

    def retake_photo(self):
        """Return to live camera feed, stopping any ongoing analysis."""
        self.is_capturing = True
        self.captured_image = None
        self.analysis_requested = False # Crucial: Signal to stop/ignore any ongoing analysis

        # Hide Confirm, Save, Retake buttons, show Capture button
        self.confirm_button.pack_forget()
        # REMOVED: self.save_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        print("Live feed active. Capture a new image.")

    # REMOVED: save_photo method
    # def save_photo(self):
    #     """Save the captured photo with a unique filename."""
    #     if self.captured_image is not None:
    #         timestamp = time.strftime("%Y%m%d_%H%M%S")
    #         filename = f"captured_image_{timestamp}.jpg"
    #
    #         cv2.imwrite(filename, self.captured_image)
    #         print(f"Photo saved as {filename}")
    #
    #         self.save_button.config(text="Saved!", state="disabled")


    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.retake_button.config(state="normal") # Enable retake button
        # REMOVED: save_button related lines

    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

  from .autonotebook import tqdm as notebook_tqdm


You can get one from https://makers.generativeai.google/key
Image captured. Press 'Confirm' to analyze or 'Retake' to capture again.
Starting Gemini analysis in background...
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'

--- Gemini Analysis Result ---
Based on the image, I am unable to decipher the exact medications written. The handwriting is unclear and the image quality makes it difficult to read the names accurately. The image seems to contain a framed handwritten note.

------------------------------

Closing window...


In [2]:
import cv2
import tkinter as tk
from tkinter import Label, Button
from PIL import Image, ImageTk
import time
import os
import threading # To run analysis in a separate thread

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F" # Darker blue, like the sidebar
        self.medium_blue = "#3B6DAA" # A slightly lighter blue for some buttons
        self.orange_yellow = "#F2A83B" # For the Confirm button
        self.light_text_color = "white" # For text on dark backgrounds
        self.neutral_button_blue = "#8DA8C6" # For Retake button

        # Set the window size and background color
        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue)

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True
        self.analysis_thread = None # To hold the analysis thread
        self.analysis_requested = False # Flag to track if analysis is requested

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        # Create and pack the canvas to display the camera feed
        self.canvas = Label(root, bg=self.dark_blue)
        self.canvas.pack(pady=10)

        # Create a frame for buttons
        self.button_frame = tk.Frame(root, bg=self.dark_blue)
        self.button_frame.pack(pady=10)

        # Create and pack the capture button
        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        # Create and pack the Confirm button (initially hidden, will replace Capture)
        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.start_analysis_thread,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        # Create and pack the retake button (initially hidden)
        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        # Bind the 'q' key to quit the application
        self.root.bind('<q>', self.quit_program)

        # Start updating the camera feed
        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        # Attempt to join analysis thread if it's still running
        if self.analysis_thread and self.analysis_thread.is_alive():
            print("Waiting for analysis thread to finish...")
            self.analysis_requested = False # This can be used in analyze_image_with_gemini to break early
            self.analysis_thread.join(timeout=1) # Give it a moment to finish cleanly
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and then flip it horizontally."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (mirrored live) frame

        # --- NEW: Explicitly flip the captured image horizontally ---
        # This ensures the captured_image itself is horizontally flipped.
        self.captured_image = cv2.flip(self.captured_image, 1)
        # --- END NEW ---

        self.analysis_requested = False # Reset analysis flag for new capture

        # Display the captured (now flipped) image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)

        print("Image captured (and horizontally flipped). Press 'Confirm' to analyze or 'Retake' to capture again.")

    def start_analysis_thread(self):
        """Starts the Gemini analysis in a separate thread to keep GUI responsive."""
        if self.analysis_thread and self.analysis_thread.is_alive():
            print("Analysis already in progress. Please wait.")
            return

        self.analysis_requested = True # Set flag indicating analysis is desired
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled") # Disable retake during analysis

        print("Starting Gemini analysis in background...")
        self.analysis_thread = threading.Thread(target=self.analyze_image_with_gemini)
        self.analysis_thread.start()

    def analyze_image_with_gemini(self):
        """Send the captured image to Gemini API for analysis and print result."""
        if self.captured_image is None:
            print("ERROR: No image to analyze. This should not happen after capture.")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        if not self.gemini_client:
            print("ERROR: Gemini API is not configured. Please check your API key.")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        # Check if analysis was cancelled while waiting
        if not self.analysis_requested:
            print("Analysis cancelled by user (e.g., retake pressed).")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        if not is_success:
            print("ERROR: Error encoding image for Gemini API.")
            self.root.after(0, self.reset_buttons_after_analysis)
            return

        image_bytes_data = buffer.tobytes()
        prompt = "Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner."
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Check if analysis was cancelled during API call
            if not self.analysis_requested:
                print("Analysis result received but ignored (cancelled by user).")
                return # Don't update GUI if cancelled

            # Print the response to the Terminal
            print("\n--- Gemini Analysis Result ---")
            print(response.text)
            print("------------------------------\n")

        except Exception as e:
            print(f"ERROR: Gemini API call failed: {e}")
        finally:
            self.root.after(0, self.reset_buttons_after_analysis)

    def retake_photo(self):
        """Return to live camera feed, stopping any ongoing analysis."""
        self.is_capturing = True
        self.captured_image = None
        self.analysis_requested = False # Crucial: Signal to stop/ignore any ongoing analysis

        # Hide Confirm, Retake buttons, show Capture button
        self.confirm_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        print("Live feed active. Capture a new image.")

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.retake_button.config(state="normal") # Enable retake button


    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.release_camera()
        self.root.quit()

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"Error: {e}")

You can get one from https://makers.generativeai.google/key
Image captured (and horizontally flipped). Press 'Confirm' to analyze or 'Retake' to capture again.
Starting Gemini analysis in background...
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription. If it's not a prescription, describe the main objects in the image in a concise manner.'

--- Gemini Analysis Result ---
Here are the medications listed in the image:

*   Librex
*   Bepra
*   Prucasoft


------------------------------

Live feed active. Capture a new image.
Image captured (and horizontally flipped). Press 'Confirm' to analyze or 'Retake' to capture again.
Live feed active. Capture a new image.
Closing window...


In [4]:
import cv2
import tkinter as tk
from tkinter import Label, Button
from PIL import Image, ImageTk
import time
import os
import threading
import concurrent.futures # For managing threads more cleanly

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F"
        self.medium_blue = "#3B6DAA"
        self.orange_yellow = "#F2A83B"
        self.light_text_color = "white"
        self.neutral_button_blue = "#8DA8C6"

        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue)

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True
        self.analysis_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) # Use an executor for cleaner thread management
        self.analysis_future = None # To hold the future result of the analysis
        self.analysis_requested = False

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc" # <<<<<<< IMPORTANT: Replace with your actual Gemini API key >>>>>>>
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg=self.dark_blue)
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg=self.dark_blue)
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.start_analysis,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        self.root.bind('<q>', self.quit_program)

        self.update_feed()

    def on_closing(self):
        print("Closing window...")
        # Shutdown the executor to stop any pending or running analysis threads
        if self.analysis_executor:
            self.analysis_executor.shutdown(wait=False, cancel_futures=True)
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and then flip it horizontally."""
        self.is_capturing = False
        self.captured_image = self.frame.copy() # Get the current (mirrored live) frame

        # Explicitly flip the captured image horizontally
        self.captured_image = cv2.flip(self.captured_image, 1)

        # --- OPTIMIZATION: Resize image before display/sending to API ---
        # Get current dimensions
        h, w = self.captured_image.shape[:2]
        # Define target width, calculate proportional height
        target_width = 800 # Common resolution for good balance
        target_height = int(target_width * h / w)
        self.captured_image = cv2.resize(self.captured_image, (target_width, target_height), interpolation=cv2.INTER_AREA)
        print(f"DEBUG: Resized image to: {self.captured_image.shape[1]}x{self.captured_image.shape[0]} pixels.")
        # --- END OPTIMIZATION ---

        self.analysis_requested = False # Reset analysis flag for new capture

        # Display the captured (now flipped and resized) image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)

        print("Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze or 'Retake' to capture again.")

    def start_analysis(self):
        """Initiates the Gemini analysis in a background thread."""
        if self.analysis_future and self.analysis_future.running():
            print("Analysis already in progress. Please wait.")
            return

        if self.captured_image is None:
            print("ERROR: No image to analyze. Please capture an image first.")
            return

        self.analysis_requested = True
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled")

        print("Starting Gemini analysis in background...")
        # Submit the analysis task to the thread pool
        self.analysis_future = self.analysis_executor.submit(self._run_analysis_task)
        # Set a callback to handle the result (or error) when the task is done
        self.analysis_future.add_done_callback(self._analysis_done_callback)


    def _run_analysis_task(self):
        """The actual task to be run in a separate thread for Gemini analysis."""
        if not self.gemini_client:
            return {"status": "error", "message": "Gemini API not configured."}
        
        # Check if cancellation was requested before starting
        if not self.analysis_requested:
            return {"status": "cancelled", "message": "Analysis cancelled before start."}

        # Use a BytesIO object for encoding to avoid saving to disk
        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) # Lowered quality slightly for potential speed
        if not is_success:
            return {"status": "error", "message": "Error encoding image for Gemini API."}

        image_bytes_data = buffer.tobytes()

        # Prompt for Gemini, specifying desired output format for better accuracy
        prompt = (
            "Identify the names of all the medications listed in this prescription, as a concise comma-separated list. "
            "If it's not a prescription, describe the main objects in the image concisely. "
            "Example for prescription: 'Medication A, Medication B, Medication C'. "
            "Example for non-prescription: 'A red car parked on the street'."
        )
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash') # gemini-2.0-flash is faster

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Check if cancellation was requested during API call
            if not self.analysis_requested:
                return {"status": "cancelled", "message": "Analysis cancelled during API call."}

            return {"status": "success", "text": response.text}

        except Exception as e:
            return {"status": "error", "message": f"Gemini API call failed: {e}"}

    def _analysis_done_callback(self, future):
        """Callback executed when the analysis task is complete."""
        # This callback runs on the main thread
        try:
            result = future.result() # Get the result from the thread
            if result["status"] == "success":
                print("\n--- Gemini Analysis Result ---")
                print(result["text"])
                print("------------------------------\n")
            elif result["status"] == "cancelled":
                print(f"Analysis was cancelled: {result['message']}")
            else: # error
                print(f"ERROR: {result['message']}")
        except concurrent.futures.CancelledError:
            print("Analysis task was explicitly cancelled (e.g., by shutdown).")
        except Exception as e:
            print(f"An unexpected error occurred in analysis callback: {e}")
        finally:
            self.reset_buttons_after_analysis()

    def retake_photo(self):
        """Return to live camera feed, stopping any ongoing analysis."""
        # Attempt to cancel any pending analysis
        if self.analysis_future:
            self.analysis_future.cancel()
            print("Attempted to cancel ongoing analysis.")
        self.analysis_requested = False # Crucial: Signal to stop/ignore any ongoing analysis

        self.is_capturing = True
        self.captured_image = None

        # Hide Confirm, Retake buttons, show Capture button
        self.confirm_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)

        print("Live feed active. Capture a new image.")

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis or error."""
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.retake_button.config(state="normal") # Enable retake button
        # Re-enable Confirm button if we want to allow re-analysis of the same image (if not retaken)
        # self.confirm_button.config(state="normal", text="Confirm") # Optional: uncomment if you want to re-analyze same pic

    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.root.quit() # This will trigger on_closing, which handles thread shutdown

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"An error occurred: {e}")

You can get one from https://makers.generativeai.google/key
DEBUG: Resized image to: 800x600 pixels.
Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze or 'Retake' to capture again.
Starting Gemini analysis in background...
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription, as a concise comma-separated list. If it's not a prescription, describe the main objects in the image concisely. Example for prescription: 'Medication A, Medication B, Medication C'. Example for non-prescription: 'A red car parked on the street'.'

--- Gemini Analysis Result ---
Librax, Bepra, Prucasoft
------------------------------

Attempted to cancel ongoing analysis.
Live feed active. Capture a new image.
DEBUG: Resized image to: 800x600 pixels.
Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze or 'Retake' to capture again.
Closing window...


In [1]:
##worked one merged 


import cv2
import tkinter as tk
from tkinter import Label, Button
from PIL import Image, ImageTk
import time
import os
import threading
import concurrent.futures

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F"
        self.medium_blue = "#3B6DAA"
        self.orange_yellow = "#F2A83B"
        self.light_text_color = "white"
        self.neutral_button_blue = "#8DA8C6"

        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue)

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Controls if live feed is active
        self.analysis_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
        self.analysis_future = None
        self.analysis_requested = False # Flag to indicate if an analysis has been confirmed by user

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc"
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg=self.dark_blue)
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg=self.dark_blue)
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.start_analysis,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        self.root.bind('<q>', self.quit_program)

        self.update_feed() # Start the live camera feed

    def on_closing(self):
        print("Closing window...")
        # Shutdown the executor to stop any pending or running analysis threads
        if self.analysis_executor:
            self.analysis_executor.shutdown(wait=False, cancel_futures=True)
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and then flip it horizontally."""
        self.is_capturing = False # Stop live feed
        self.captured_image = self.frame.copy() # Get the current (mirrored live) frame

        # Explicitly flip the captured image horizontally
        self.captured_image = cv2.flip(self.captured_image, 1)

        # --- OPTIMIZATION: Resize image before display/sending to API ---
        h, w = self.captured_image.shape[:2]
        target_width = 800
        target_height = int(target_width * h / w)
        self.captured_image = cv2.resize(self.captured_image, (target_width, target_height), interpolation=cv2.INTER_AREA)
        print(f"DEBUG: Resized image to: {self.captured_image.shape[1]}x{self.captured_image.shape[0]} pixels.")
        # --- END OPTIMIZATION ---

        self.analysis_requested = False # No analysis requested yet for this new image

        # Display the captured (now flipped and resized) image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake and Confirm buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.config(state="normal", text="Confirm") # Ensure Confirm button is enabled for new analysis

        print("Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze or 'Retake' to capture again.")

    def start_analysis(self):
        """Initiates the Gemini analysis in a background thread."""
        if self.analysis_future and self.analysis_future.running():
            print("Analysis already in progress. Please wait.")
            return

        if self.captured_image is None:
            print("ERROR: No image to analyze. Please capture an image first.")
            return

        self.analysis_requested = True # Set flag to true as analysis is initiated
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled") # Disable retake during analysis

        print("Starting Gemini analysis in background...")
        self.analysis_future = self.analysis_executor.submit(self._run_analysis_task)
        self.analysis_future.add_done_callback(self._analysis_done_callback)

    def _run_analysis_task(self):
        """The actual task to be run in a separate thread for Gemini analysis."""
        if not self.gemini_client:
            return {"status": "error", "message": "Gemini API not configured."}

        # Crucial check: if analysis was cancelled by retake, stop early
        if not self.analysis_requested:
            print("DEBUG: Analysis task detected cancellation request (before API call).")
            return {"status": "cancelled", "message": "Analysis cancelled before API call."}

        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
        if not is_success:
            return {"status": "error", "message": "Error encoding image for Gemini API."}

        image_bytes_data = buffer.tobytes()

        prompt = (
            "Identify the names of all the medications listed in this prescription, as a concise comma-separated list. "
            "If it's not a prescription, describe the main objects in the image concisely. "
            "Example for prescription: 'Medication A, Medication B, Medication C'. "
            "Example for non-prescription: 'A red car parked on the street'."
        )
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Check again after API call
            if not self.analysis_requested:
                print("DEBUG: Analysis task detected cancellation request (after API call).")
                return {"status": "cancelled", "message": "Analysis cancelled after API call."}

            return {"status": "success", "text": response.text}

        except Exception as e:
            return {"status": "error", "message": f"Gemini API call failed: {e}"}

    def _analysis_done_callback(self, future):
        """Callback executed on the main thread when the analysis task is complete."""
        try:
            result = future.result() # Get the result from the thread
            if result["status"] == "success":
                print("\n--- Gemini Analysis Result ---")
                print(result["text"])
                print("------------------------------\n")
            elif result["status"] == "cancelled":
                print(f"Analysis was cancelled: {result['message']}")
            else: # error
                print(f"ERROR: {result['message']}")
        except concurrent.futures.CancelledError:
            print("Analysis task was explicitly cancelled (e.g., by retake or shutdown).")
        except Exception as e:
            print(f"An unexpected error occurred in analysis callback: {e}")
        finally:
            # Only reset buttons if the analysis was actually requested for the *current* state.
            # This prevents a late analysis result from overriding a "Retake" state.
            if self.analysis_requested: # This condition is key for preventing stale resets
                 self.reset_buttons_after_analysis()


    def retake_photo(self):
        """Resets the application to the initial live capture state."""
        print("Retake button pressed. Resetting to live feed.")

        # 1. Stop/Cancel any ongoing analysis
        self.analysis_requested = False # This is crucial: signal background task to stop/ignore
        if self.analysis_future and self.analysis_future.running():
            self.analysis_future.cancel() # Attempt to cancel the future
            print("DEBUG: Attempted to cancel ongoing analysis future.")
        self.analysis_future = None # Clear the future

        # 2. Clear captured image
        self.captured_image = None

        # 3. Reset GUI buttons to initial state
        self.confirm_button.pack_forget()
        self.retake_button.pack_forget()
        self.capture_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.config(state="normal", text="Confirm") # Re-enable for next cycle

        # 4. Restart live camera feed
        self.is_capturing = True
        # The update_feed() method handles displaying the live feed
        # (It's already called via root.after, so it will pick up self.is_capturing = True)

        print("Application reset to live capture mode.")

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis completes or fails."""
        # This function is called by the callback.
        # It ensures buttons are re-enabled only if the state is still 'analysis_requested'.
        # If 'retake_photo' was pressed during analysis, analysis_requested would be False,
        # preventing a stale re-enable of buttons.
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.retake_button.config(state="normal") # Enable retake button
        # If you want to enable a "Confirm" button *after* analysis on the same image,
        # you would uncomment a line here like self.confirm_button.config(state="normal", text="Re-Analyze")

    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.root.quit() # This will trigger on_closing, which handles thread shutdown

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"An error occurred: {e}")

  from .autonotebook import tqdm as notebook_tqdm


You can get one from https://makers.generativeai.google/key
DEBUG: Resized image to: 800x600 pixels.
Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze or 'Retake' to capture again.
Starting Gemini analysis in background...
DEBUG: Prompt being sent: 'Identify the names of all the medications listed in this prescription, as a concise comma-separated list. If it's not a prescription, describe the main objects in the image concisely. Example for prescription: 'Medication A, Medication B, Medication C'. Example for non-prescription: 'A red car parked on the street'.'

--- Gemini Analysis Result ---
Two people in a room, one holding a phone.
------------------------------

Retake button pressed. Resetting to live feed.
Application reset to live capture mode.
Closing window...


In [None]:
import cv2
import tkinter as tk
from tkinter import Label, Button, messagebox # Import messagebox for user feedback
from PIL import Image, ImageTk
import time
import os
import threading
import concurrent.futures

# Import Gemini API libraries
import google.generativeai as genai
from io import BytesIO

class CameraApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Camera GUI with Gemini Vision")

        # Define the new color palette based on the image
        self.dark_blue = "#2C4F7F"
        self.medium_blue = "#3B6DAA"
        self.orange_yellow = "#F2A83B"
        self.light_text_color = "white"
        self.neutral_button_blue = "#8DA8C6"

        self.root.geometry("800x600")
        self.root.config(bg=self.dark_blue)

        self.cap = cv2.VideoCapture(0)
        self.captured_image = None
        self.gemini_client = None
        self.is_capturing = True # Controls if live feed is active
        self.analysis_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
        self.analysis_future = None
        self.analysis_requested = False # Flag to indicate if an analysis has been confirmed by user

        # --- NEW: Define a directory to save captured images ---
        self.save_directory = "captured_images"
        if not os.path.exists(self.save_directory):
            os.makedirs(self.save_directory)
            print(f"Created directory: {self.save_directory}")
        # --- END NEW ---

        if not self.cap.isOpened():
            raise Exception("Could not access the camera")

        self.api_key = "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc"
        if self.api_key == "AIzaSyDJHOLmL3qdQ22Vfy6Td2oPHDAcBkggcXc":
            print("WARNING: Please replace 'YOUR_GEMINI_API_KEY' with your actual Gemini API key.")
            print("You can get one from https://makers.generativeai.google/key")
        try:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai
        except Exception as e:
            print(f"Error configuring Gemini API: {e}")
            self.gemini_client = None

        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)

        self.canvas = Label(root, bg=self.dark_blue)
        self.canvas.pack(pady=10)

        self.button_frame = tk.Frame(root, bg=self.dark_blue)
        self.button_frame.pack(pady=10)

        self.capture_button = Button(self.button_frame, text="Capture", command=self.capture_photo,
                                     font=("Arial", 14), bg=self.medium_blue, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#2D5A8A", activeforeground=self.light_text_color)
        self.capture_button.pack(side=tk.LEFT, padx=10)

        self.confirm_button = Button(self.button_frame, text="Confirm", command=self.start_analysis,
                                     font=("Arial", 14), bg=self.orange_yellow, fg=self.light_text_color,
                                     relief="flat", bd=0, padx=20, pady=10,
                                     activebackground="#D18F2E", activeforeground=self.light_text_color)
        self.confirm_button.pack_forget()

        self.retake_button = Button(self.button_frame, text="Retake", command=self.retake_photo,
                                    font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                    relief="flat", bd=0, padx=20, pady=10,
                                    activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.retake_button.pack_forget()

        # --- NEW: Save Image Button ---
        self.save_image_button = Button(self.button_frame, text="Save Image", command=self.save_captured_image,
                                        font=("Arial", 14), bg=self.neutral_button_blue, fg=self.light_text_color,
                                        relief="flat", bd=0, padx=20, pady=10,
                                        activebackground="#7D99B5", activeforeground=self.light_text_color)
        self.save_image_button.pack_forget() # Initially hidden
        # --- END NEW ---

        self.root.bind('<q>', self.quit_program)

        self.update_feed() # Start the live camera feed

    def on_closing(self):
        print("Closing window...")
        # Shutdown the executor to stop any pending or running analysis threads
        if self.analysis_executor:
            self.analysis_executor.shutdown(wait=False, cancel_futures=True)
        self.release_camera()
        self.root.destroy()

    def update_feed(self):
        if self.is_capturing:
            ret, frame = self.cap.read()
            if ret:
                frame = cv2.flip(frame, 1) # Mirror the live feed for user convenience
                self.frame = frame # Store the current live frame
                img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img = Image.fromarray(img)
                imgtk = ImageTk.PhotoImage(image=img)

                self.canvas.imgtk = imgtk
                self.canvas.configure(image=imgtk)
        self.root.after(10, self.update_feed)

    def capture_photo(self):
        """Capture the current frame, freeze the feed, and then flip it horizontally."""
        self.is_capturing = False # Stop live feed
        self.captured_image = self.frame.copy() # Get the current (mirrored live) frame

        # Explicitly flip the captured image horizontally to get the actual view
        self.captured_image = cv2.flip(self.captured_image, 1)

        # --- OPTIMIZATION: Resize image before display/sending to API ---
        h, w = self.captured_image.shape[:2]
        target_width = 800
        target_height = int(target_width * h / w)
        self.captured_image = cv2.resize(self.captured_image, (target_width, target_height), interpolation=cv2.INTER_AREA)
        print(f"DEBUG: Resized image to: {self.captured_image.shape[1]}x{self.captured_image.shape[0]} pixels.")
        # --- END OPTIMIZATION ---

        self.analysis_requested = False # No analysis requested yet for this new image

        # Display the captured (now flipped and resized) image on the canvas
        img = cv2.cvtColor(self.captured_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.canvas.imgtk = imgtk
        self.canvas.configure(image=imgtk)

        # Hide capture button, show Retake, Confirm, and Save buttons
        self.capture_button.pack_forget()
        self.retake_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.pack(side=tk.LEFT, padx=10)
        self.save_image_button.pack(side=tk.LEFT, padx=10) # --- NEW: Show Save button ---
        self.confirm_button.config(state="normal", text="Confirm") # Ensure Confirm button is enabled for new analysis

        print("Image captured (and horizontally flipped/resized). Press 'Confirm' to analyze, 'Retake' to capture again, or 'Save Image' to save.")

    # --- NEW FUNCTION: Save captured image ---
    def save_captured_image(self):
        if self.captured_image is not None:
            timestamp = time.strftime("%Y%m%d-%H%M%S")
            filename = os.path.join(self.save_directory, f"capture_{timestamp}.jpg")
            try:
                cv2.imwrite(filename, self.captured_image)
                print(f"Image saved successfully to: {filename}")
                messagebox.showinfo("Image Saved", f"Image saved to:\n{filename}")
            except Exception as e:
                print(f"ERROR: Could not save image: {e}")
                messagebox.showerror("Save Error", f"Failed to save image:\n{e}")
        else:
            print("No image captured to save.")
            messagebox.showwarning("No Image", "Please capture an image first before trying to save.")
    # --- END NEW FUNCTION ---

    def start_analysis(self):
        """Initiates the Gemini analysis in a background thread."""
        if self.analysis_future and self.analysis_future.running():
            print("Analysis already in progress. Please wait.")
            return

        if self.captured_image is None:
            print("ERROR: No image to analyze. Please capture an image first.")
            return

        self.analysis_requested = True # Set flag to true as analysis is initiated
        self.confirm_button.config(state="disabled", text="Analyzing...")
        self.retake_button.config(state="disabled") # Disable retake during analysis
        self.save_image_button.config(state="disabled") # --- NEW: Disable Save button during analysis ---

        print("Starting Gemini analysis in background...")
        self.analysis_future = self.analysis_executor.submit(self._run_analysis_task)
        self.analysis_future.add_done_callback(self._analysis_done_callback)

    def _run_analysis_task(self):
        """The actual task to be run in a separate thread for Gemini analysis."""
        if not self.gemini_client:
            return {"status": "error", "message": "Gemini API not configured."}

        # Crucial check: if analysis was cancelled by retake, stop early
        if not self.analysis_requested:
            print("DEBUG: Analysis task detected cancellation request (before API call).")
            return {"status": "cancelled", "message": "Analysis cancelled before API call."}

        is_success, buffer = cv2.imencode(".jpg", self.captured_image, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
        if not is_success:
            return {"status": "error", "message": "Error encoding image for Gemini API."}

        image_bytes_data = buffer.tobytes()

        prompt = (
            "Identify the names of all the medications listed in this prescription, as a concise comma-separated list. "
            "If it's not a prescription, describe the main objects in the image concisely. "
            "Example for prescription: 'Medication A, Medication B, Medication C'. "
            "Example for non-prescription: 'A red car parked on the street'."
        )
        print(f"DEBUG: Prompt being sent: '{prompt}'")

        try:
            model = self.gemini_client.GenerativeModel('gemini-2.0-flash')

            response = model.generate_content(
                contents=[
                    {
                        "mime_type": "image/jpeg",
                        "data": image_bytes_data
                    },
                    {"text": prompt}
                ]
            )

            # Check again after API call
            if not self.analysis_requested:
                print("DEBUG: Analysis task detected cancellation request (after API call).")
                return {"status": "cancelled", "message": "Analysis cancelled after API call."}

            return {"status": "success", "text": response.text}

        except Exception as e:
            return {"status": "error", "message": f"Gemini API call failed: {e}"}

    def _analysis_done_callback(self, future):
        """Callback executed on the main thread when the analysis task is complete."""
        try:
            result = future.result() # Get the result from the thread
            if result["status"] == "success":
                print("\n--- Gemini Analysis Result ---")
                print(result["text"])
                print("------------------------------\n")
            elif result["status"] == "cancelled":
                print(f"Analysis was cancelled: {result['message']}")
            else: # error
                print(f"ERROR: {result['message']}")
        except concurrent.futures.CancelledError:
            print("Analysis task was explicitly cancelled (e.g., by retake or shutdown).")
        except Exception as e:
            print(f"An unexpected error occurred in analysis callback: {e}")
        finally:
            # Only reset buttons if the analysis was actually requested for the *current* state.
            # This prevents a late analysis result from overriding a "Retake" state.
            if self.analysis_requested: # This condition is key for preventing stale resets
                self.reset_buttons_after_analysis()


    def retake_photo(self):
        """Resets the application to the initial live capture state."""
        print("Retake button pressed. Resetting to live feed.")

        # 1. Stop/Cancel any ongoing analysis
        self.analysis_requested = False # This is crucial: signal background task to stop/ignore
        if self.analysis_future and self.analysis_future.running():
            self.analysis_future.cancel() # Attempt to cancel the future
            print("DEBUG: Attempted to cancel ongoing analysis future.")
        self.analysis_future = None # Clear the future

        # 2. Clear captured image
        self.captured_image = None

        # 3. Reset GUI buttons to initial state
        self.confirm_button.pack_forget()
        self.retake_button.pack_forget()
        self.save_image_button.pack_forget() # --- NEW: Hide Save button ---
        self.capture_button.pack(side=tk.LEFT, padx=10)
        self.confirm_button.config(state="normal", text="Confirm") # Re-enable for next cycle

        # 4. Restart live camera feed
        self.is_capturing = True
        # The update_feed() method handles displaying the live feed
        # (It's already called via root.after, so it will pick up self.is_capturing = True)

        print("Application reset to live capture mode.")

    def reset_buttons_after_analysis(self):
        """Resets the state of buttons after an analysis completes or fails."""
        # This function is called by the callback.
        # It ensures buttons are re-enabled only if the state is still 'analysis_requested'.
        # If 'retake_photo' was pressed during analysis, analysis_requested would be False,
        # preventing a stale re-enable of buttons.
        self.confirm_button.pack_forget() # Hide confirm button after it's done its job
        self.retake_button.config(state="normal") # Enable retake button
        self.save_image_button.config(state="normal") # --- NEW: Enable Save button after analysis ---
        # If you want to enable a "Confirm" button *after* analysis on the same image,
        # you would uncomment a line here like self.confirm_button.config(state="normal", text="Re-Analyze")

    def release_camera(self):
        """Release the camera resources."""
        if self.cap.isOpened():
            self.cap.release()

    def quit_program(self, event=None):
        """Exit the program when 'q' is pressed."""
        print("Exiting...")
        self.root.quit() # This will trigger on_closing, which handles thread shutdown

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = CameraApp(root)
        root.mainloop()
    except Exception as e:
        print(f"An error occurred: {e}")