In [None]:
import cv2
import pyttsx3
import threading
import numpy as np
from ultralytics import YOLO  # For YOLO object detection

# Initialize Text-to-Speech engine
tts_engine = pyttsx3.init()
tts_engine.setProperty('rate', 150)  # Adjust speed
tts_engine.setProperty('volume', 0.9)  # Adjust volume

# Function to speak detected objects
def speak_objects(detected_objects):
    if detected_objects:
        sentence = "I detected: " + ", ".join(detected_objects)
        tts_engine.say(sentence)
        tts_engine.runAndWait()

# Initialize YOLO model
model = YOLO('yolov8n.pt')  # Replace with path to your YOLO model

# Initialize webcam feed
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection
    results = model(frame)
    detections = results[0].boxes.data.cpu().numpy()

    # Parse detected objects
    detected_objects = []
    for detection in detections:
        class_id, confidence = int(detection[5]), detection[4]
        if confidence > 0.5:  # Confidence threshold
            detected_objects.append(results[0].names[class_id])

    # Display results on the frame
    for detection in detections:
        x1, y1, x2, y2 = map(int, detection[:4])
        class_id, confidence = int(detection[5]), detection[4]
        if confidence > 0.5:
            label = f"{results[0].names[class_id]}: {confidence:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    # Show the video feed
    cv2.imshow('Object Detection', frame)

    # Use a separate thread for speech synthesis to avoid blocking
    if detected_objects:
        threading.Thread(target=speak_objects, args=(detected_objects,)).start()

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
