In [None]:
import os
import cv2
import numpy as np
import sounddevice as sd
import speech_recognition as sr
import pickle
from PIL import Image
import pyttsx3
from transformers import GenerationConfig

# üì¶ Load processor and model from pickle
with open('processor.pkl', 'rb') as processor_file:
    processor = pickle.load(processor_file)

with open('model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

# ‚úÖ Patch model generation config to fix pickle issues
model.generation_config = GenerationConfig()

# üó£Ô∏è Text-to-speech
def speak(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 150)
    engine.setProperty('volume', 1)
    engine.say(text)
    engine.runAndWait()

# üì∏ Take a picture using the webcam (no GUI functions)
def take_picture():
    print("üé• Attempting to access webcam...")
    cap = cv2.VideoCapture(1)

    if not cap.isOpened():
        print('‚ùå Could not open webcam')
        return

    print("‚úÖ Webcam opened successfully. Capturing frame...")
    ret, frame = cap.read()

    if ret:
        cv2.imwrite('captured_image.jpg', frame)
        print('üì∑ Image saved as captured_image.jpg')
    else:
        print('‚ùå Failed to capture image')

    cap.release()
    # GUI functions removed for compatibility:
    # cv2.destroyAllWindows()

# üéôÔ∏è Listen for command and trigger capture
def listen_for_command():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print('üé§ Listening for command...')
        try:
            audio = recognizer.listen(source)
            command = recognizer.recognize_google(audio).lower()
            print(f'‚úÖ Command received: {command}')
            if 'take picture' in command or 'click picture' in command or 'take a picture from my webcam' in command:
                take_picture()
                generate_caption_from_file()
        except sr.UnknownValueError:
            print('‚ùå Sorry, I did not understand that.')
        except sr.RequestError as e:
            print(f'‚ùå Could not request results: {e}')

# üß† Generate caption from captured image
def generate_caption_from_file():
    print("üñºÔ∏è Generating caption for captured image...")
    img = cv2.imread('captured_image.jpg')
    if img is not None:
        img_input = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        inputs = processor(images=img_input, return_tensors='pt')
        pixel_values = inputs["pixel_values"]

        try:
            out = model.generate(
                pixel_values=pixel_values,
                max_length=16,
                num_beams=4,
                no_repeat_ngram_size=2,
                early_stopping=True
            )
            caption = processor.decode(out[0], skip_special_tokens=True)
            print(f'üßæ Generated Caption: {caption}')
            speak(caption)
        except Exception as e:
            print(f'‚ùå Error while generating caption: {e}')
    else:
        print('‚ùå No image found!')

# üìÅ Generate caption from uploaded file
def generate_caption_from_upload(file_path):
    print(f"üì§ Loading image from: {file_path}")
    if not os.path.isfile(file_path):
        print("‚ùå Invalid file path.")
        return

    img = cv2.imread(file_path)
    if img is not None:
        img_input = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        inputs = processor(images=img_input, return_tensors='pt')
        pixel_values = inputs["pixel_values"]

        try:
            out = model.generate(
                pixel_values=pixel_values,
                max_length=16,
                num_beams=4,
                no_repeat_ngram_size=2,
                early_stopping=True
            )
            caption = processor.decode(out[0], skip_special_tokens=True)
            print(f'üßæ Generated Caption: {caption}')
            speak(caption)
        except Exception as e:
            print(f'‚ùå Error generating caption: {e}')
    else:
        print("‚ùå Unable to read the image.")

# üîÅ Main entry
if __name__ == '__main__':
    choice = input("Type 'voice' to use voice command or 'upload' to upload an image: ").strip().lower()
    if choice == 'voice':
        listen_for_command()
    elif choice == 'upload':
        file_path = input("Enter full path to the image: ").strip()
        generate_caption_from_upload(file_path)
    else:
        print("‚ùå Invalid choice. Please type 'voice' or 'upload'.")


  from .autonotebook import tqdm as notebook_tqdm


üé§ Listening for command...
‚úÖ Command received: take a picture
