In [1]:
import tkinter as tk
import cv2
import numpy as np
import pyttsx3
import speech_recognition as sr
import threading

# Load YOLOv3 model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []

with open("coco.names", "r") as f:
    classes = [line.strip() for line in f]

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Initialize speech recognition
recognizer = sr.Recognizer()

# Global variable to control the detection loop
detecting = False

def detect_objects(frame):
    height, width, channels = frame.shape

    # Preprocess the frame for YOLO
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    detected_objects = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:
                label = str(classes[class_id])
                detected_objects.append(label)

    return detected_objects

def speak_detected_objects(objects):
    if len(objects) == 0:
        engine.say("No objects detected.")
    else:
        object_str = ", ".join(objects)
        engine.say(f"I see {object_str}")
    engine.runAndWait()

def voice_command():
    with sr.Microphone() as source:
        print("Listening for commands...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

        try:
            command = recognizer.recognize_google(audio).lower()
            print("Command:", command)
            return command
        except sr.UnknownValueError:
            print("Could not understand audio.")
            return None
        except sr.RequestError as e:
            print(f"Google Speech Recognition request failed: {e}")
            return None

def object_detection_loop():
    global detecting
    # Initialize video capture (replace 0 with your desired camera index)
    cap = cv2.VideoCapture(0)

    # Check if the camera opened successfully
    if not cap.isOpened():
        print("Error: Unable to open video capture.")
        engine.say("Error: Unable to open video capture.")
        engine.runAndWait()
        return

    engine.say("Object detection started. Say 'stop' to stop detection.")
    engine.runAndWait()

    while detecting:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to capture frame.")
            engine.say("Error: Unable to capture frame.")
            engine.runAndWait()
            break

        # Perform object detection
        new_objects = detect_objects(frame)

        # Speak out detected objects
        speak_detected_objects(new_objects)

        # Wait a bit before the next detection
        cv2.waitKey(1000)

    # Release the video capture and close the text-to-speech engine
    cap.release()
    engine.say("Object detection stopped.")
    engine.runAndWait()
    engine.stop()

def start_detection():
    global detecting
    if not detecting:
        detecting = True
        detection_thread = threading.Thread(target=object_detection_loop)
        detection_thread.start()
        print("Detection started")
    else:
        print("Detection already running")

def stop_detection():
    global detecting
    detecting = False
    print("Detection stopped")

def voice_control_loop():
    while True:
        command = voice_command()
        if command == "start":
            start_detection()
        elif command == "stop":
            stop_detection()
            break

# Create the main window
root = tk.Tk()
root.title("Object Detection Application")

# Label to show instructions
label = tk.Label(root, text="Use voice commands 'start' and 'stop' to control object detection.", font=("Arial", 14))
label.pack(pady=20)

# Start the voice control loop in a separate thread
voice_thread = threading.Thread(target=voice_control_loop)
voice_thread.start()

# Run the application
root.mainloop()


Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...


In [None]:
import tkinter as tk
import cv2
import numpy as np
import pyttsx3
import speech_recognition as sr
import threading

# Load YOLOv3 model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []

with open("coco.names", "r") as f:
    classes = [line.strip() for line in f]

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Initialize speech recognition
recognizer = sr.Recognizer()

# Global variable to control the detection loop
detecting = False

def detect_objects(frame):
    height, width, channels = frame.shape

    # Preprocess the frame for YOLO
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    detected_objects = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:
                label = str(classes[class_id])
                detected_objects.append(label)

    return detected_objects

def speak_detected_objects(objects):
    if len(objects) == 0:
        engine.say("No objects detected.")
    else:
        object_str = ", ".join(objects)
        engine.say(f"I see {object_str}")
    engine.runAndWait()

def voice_command():
    with sr.Microphone() as source:
        print("Listening for commands...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

        try:
            command = recognizer.recognize_google(audio).lower()
            print("Command:", command)
            return command
        except sr.UnknownValueError:
            print("Could not understand audio.")
            return None
        except sr.RequestError as e:
            print(f"Google Speech Recognition request failed: {e}")
            return None

def object_detection_loop():
    global detecting
    # Initialize video capture (replace 0 with your desired camera index)
    cap = cv2.VideoCapture(0)

    # Check if the camera opened successfully
    if not cap.isOpened():
        print("Error: Unable to open video capture.")
        engine.say("Error: Unable to open video capture.")
        engine.runAndWait()
        return

    engine.say("Object detection started. Say 'stop' to stop detection.")
    engine.runAndWait()

    while detecting:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to capture frame.")
            engine.say("Error: Unable to capture frame.")
            engine.runAndWait()
            break

        # Perform object detection
        new_objects = detect_objects(frame)

        # Speak out detected objects
        speak_detected_objects(new_objects)

        # Wait a bit before the next detection
        cv2.waitKey(1000)

    # Release the video capture and close the text-to-speech engine
    cap.release()
    engine.say("Object detection stopped.")
    engine.runAndWait()
    engine.stop()

def start_detection():
    global detecting
    if not detecting:
        detecting = True
        detection_thread = threading.Thread(target=object_detection_loop)
        detection_thread.start()
        print("Detection started")
        engine.say("Detection started")
        engine.runAndWait()
    else:
        print("Detection already running")
        engine.say("Detection already running")
        engine.runAndWait()

def stop_detection():
    global detecting
    if detecting:
        detecting = False
        print("Detection stopped")
        engine.say("Detection stopped")
        engine.runAndWait()
    else:
        print("Detection is not running")
        engine.say("Detection is not running")
        engine.runAndWait()

def voice_control_loop():
    while True:
        command = voice_command()
        if command == "start":
            start_detection()
        elif command == "stop":
            stop_detection()
            break

# Create the main window
root = tk.Tk()
root.title("Object Detection Application")

# Label to show instructions
label = tk.Label(root, text="Use voice commands 'start' and 'stop' to control object detection.", font=("Arial", 14))
label.pack(pady=20)

# Start the voice control loop in a separate thread
voice_thread = threading.Thread(target=voice_control_loop)
voice_thread.start()

# Run the application
root.mainloop()


Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Could not understand audio.
Listening for commands...
Listening for commands...
Could not understand audio.
Could not understand audio.
Listening for commands...
Listening for commands...
Could not understand audio.
Could not understand audio.
Listening for commands...
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Could not understand audio.
Listening for commands...
Listening for commands...
Could not understand audio.
Could not understand audio.
Listening for commands...
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Co

Exception in thread Thread-48:
Traceback (most recent call last):
  File "D:\Users\Admin\anaconda3\lib\threading.py", line 980, in _bootstrap_inner
    self.run()
  File "D:\Users\Admin\anaconda3\lib\threading.py", line 917, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_23836\2707426348.py", line 87, in object_detection_loop
  File "D:\Users\Admin\anaconda3\lib\site-packages\pyttsx3\engine.py", line 177, in runAndWait
    raise RuntimeError('run loop already started')
RuntimeError: run loop already started


Listening for commands...
Command: exception is
Listening for commands...
Command: extension in canada
Listening for commands...
Command: 47
Command: 47
Listening for commands...
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Command: set timer for
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
Could not understand audio.
Listening for commands...
