# Object Detection without GUI

This project is an object detection system that can detect objects in a video stream or webcam feed and record the video with highlighted objects. It utilizes the OpenCV library and a pre-trained deep learning model for object detection.

To use this code:

1. **Process the Video File:** Use the process_video() method to detect objects in a selected video file. After selecting the video file and configuring the necessary parameters, call object_detector.process_video() to start the object detection process. The processed video will be displayed with bounding boxes around the detected objects, and the frames per second (FPS) will be shown in the top left corner.

2. **Process the Webcam Feed:** Use the process_webcam() method to detect objects in the webcam feed. Call object_detector.process_webcam() to initiate the object detection process. The webcam feed will be displayed with bounding boxes around the detected objects. To stop the webcam feed, press the 'q' key or close the window.

Please note that the object detection model used in this project is trained on the COCO dataset, which encompasses a wide variety of objects. Detected objects will be labeled with their class names and confidence scores.


In [59]:
import cv2
import numpy as np
import time

In [60]:
np.random.seed(20)

In [61]:
class ObjectDetector:
    def __init__(self, video_path, config_path, model_path, classes_path, output_path):
        """
        Initializes the ObjectDetector class with the given parameters.

        Args:
        - video_path: The path to the input video file or the webcam index (int) for live video.
        - config_path: The path to the model configuration file.
        - model_path: The path to the model weights file.
        - classes_path: The path to the file containing class labels.
        - output_path: The path to save the output video.
        """
        self.video_path = video_path
        self.config_path = config_path
        self.model_path = model_path
        self.classes_path = classes_path
        self.output_path = output_path

        # Initialize the object detection model
        self.net = cv2.dnn_DetectionModel(self.model_path, self.config_path)
        self.net.setInputSize(320, 320)
        self.net.setInputScale(1.0/127.5)
        self.net.setInputMean((127.5, 127.5, 127.5))
        self.net.setInputSwapRB(True)

        # Read the class labels and assign random colors to each class
        self.read_classes()

    def read_classes(self):
        """
        Reads the class labels from the file and assigns random -colors to each class.
        """
        with open(self.classes_path, 'r') as f:
            self.classes_list = f.read().splitlines()

        self.classes_list.insert(0, '__Background__')

        self.color_list = np.random.uniform(low=0, high=255, size=(len(self.classes_list), 3))

    def detect_objects(self, image):
        """
        Detects objects in the given image and draws bounding boxes around them.

        Args:
        - image: The input image to detect objects in.
        """
        class_label_ids, confidences, bboxes = self.net.detect(image, confThreshold=0.4)

        bboxes = list(bboxes)
        confidences = list(np.array(confidences).reshape(1, -1)[0])
        confidences = list(map(float, confidences))

        bbox_indices = cv2.dnn.NMSBoxes(bboxes, confidences, score_threshold=0.5, nms_threshold=0.2)

        if len(bbox_indices) != 0:
            for i in range(len(bbox_indices)):
                bbox = bboxes[np.squeeze(bbox_indices[i])]
                class_confidence = confidences[np.squeeze(bbox_indices[i])]
                class_label_id = np.squeeze(class_label_ids[np.squeeze(bbox_indices[i])])
                class_label = self.classes_list[class_label_id].upper()
                class_color = [int(c) for c in self.color_list[class_label_id]]

                display_text = "{}: {:.2f}".format(class_label, class_confidence)

                x, y, w, h = bbox

                cv2.rectangle(image, (x, y), (x + w, y + h), color=class_color, thickness=1)
                cv2.putText(image, display_text, (x, y - 10), cv2.FONT_HERSHEY_PLAIN, 1, class_color, 2)

    def process_video(self):
        """
        Processes the input video file and saves the output with the detected objects.
        """
        if self.video_path.isnumeric():
            cap = cv2.VideoCapture(int(self.video_path))
        else:
            cap = cv2.VideoCapture(self.video_path)

        if not cap.isOpened():
            print("Error opening video file...")
            return

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(self.output_path, fourcc, fps, (frame_width, frame_height))

        start_time = 0

        while True:
            success, image = cap.read()

            if not success:
                break

            current_time = time.time()
            fps = 1 / (current_time - start_time)
            start_time = current_time

            self.detect_objects(image)

            cv2.putText(image, "FPS: " + str(int(fps)), (20, 70), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)
            cv2.imshow("Result", image)

            out.write(image)  # Write the frame to the video writer

            # Wait for key press and check if 'q' is pressed
            key = cv2.waitKey(1)
            if key == ord("q"):
                break

        # Release video capture, video writer, and destroy windows
        cap.release()
        out.release()
        cv2.destroyAllWindows()

    def process_webcam(self):
        """
        Processes the live video from the webcam and saves the output with the detected objects.
        """
        cap = cv2.VideoCapture(0)

        if not cap.isOpened():
            print("Error opening webcam...")
            return

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(self.output_path, fourcc, fps, (frame_width, frame_height))

        while True:
            ret, frame = cap.read()

            if not ret:
                break

            self.detect_objects(frame)

            cv2.imshow("Webcam", frame)

            out.write(frame)  # Write the frame to the video writer

            # Wait for key press and check if 'q' is pressed
            key = cv2.waitKey(1)
            if key == ord("q"):
                break

        # Release video capture, video writer, and destroy windows
        cap.release()
        out.release()
        cv2.destroyAllWindows()

In [62]:
# Set the paths and configuration
video_path = "street.mp4"
config_path = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
model_path = "frozen_inference_graph.pb"
classes_path = "coco.names"
output_path = "output.mp4"

In [63]:
# Create an instance of the ObjectDetector class
object_detector = ObjectDetector(video_path, config_path, model_path, classes_path, output_path)

In [64]:
# Process the video file
object_detector.process_video()

In [65]:
# Process the webcam feed
object_detector.process_webcam()

## Object Detection with GUI

This project is an object detection system that can detect objects in a video stream or webcam feed and record the video with highlighted objects. It uses the OpenCV library and a pre-trained deep learning model to perform object detection.

### User Guide:

1. **Select Video Path**: Click the "Select Video Path" button to choose a video file (in MP4 format) that you want to process for object detection.

2. **Process Video**: Once you have selected the video file, the "Process Video" button will become active. Click it to start the object detection process. The processed video will be displayed in a new window with bounding boxes around detected objects. The FPS (frames per second) of the video will be shown in the top left corner.

3. **Select Output Path**: If you want to save the processed video, click the "Select Output Path" button to choose the output file location. The processed video will be saved in MP4 format.

4. **Process Webcam**: The "Process Webcam" button will become active after selecting an output path. Click it to use the webcam feed for object detection. The webcam feed will be displayed in a new window with bounding boxes around detected objects. You can press the 'q' key or close the window to stop the webcam feed.

Please note that the object detection model used in this project is based on the COCO dataset, which includes a wide variety of objects. The detected objects will be labeled with their class names and confidence scores.

Additionally, please note that the "Process Video" and "Process Webcam" buttons will only be active after selecting appropriate paths for them.

In [66]:
import cv2
import numpy as np
import time
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox

In [67]:
np.random.seed(20)

In [68]:
class ObjectDetector:
    """
    Class for detecting objects in a video stream and recording the video with highlighted objects.
    """

    def __init__(self, video_path, config_path, model_path, classes_path, output_path):
        """
        Initialize the ObjectDetector object.

        Parameters:
            - video_path (str or int): Path to the video file or webcam index.
            - config_path (str): Path to the model's configuration file.
            - model_path (str): Path to the model's weight file.
            - classes_path (str): Path to the file containing class labels.
            - output_path (str): Path to save the output video file.
        """
        self.video_path = video_path
        self.config_path = config_path
        self.model_path = model_path
        self.classes_path = classes_path
        self.output_path = output_path

        self.net = cv2.dnn_DetectionModel(self.model_path, self.config_path)
        self.net.setInputSize(320, 320)
        self.net.setInputScale(1.0 / 127.5)
        self.net.setInputMean((127.5, 127.5, 127.5))
        self.net.setInputSwapRB(True)

        self.read_classes()

    def read_classes(self):
        """
        Read the class labels from the file.
        """
        with open(self.classes_path, 'r') as f:
            self.classes_list = f.read().splitlines()

        self.classes_list.insert(0, '__Background__')

        self.color_list = np.random.uniform(low=0, high=255, size=(len(self.classes_list), 3))

    def detect_objects(self, image):
        """
        Detect objects in the given image and draw bounding boxes around them.

        Parameters:
            - image (numpy.ndarray): Input image array.
        """
        class_label_ids, confidences, bboxes = self.net.detect(image, confThreshold=0.4)

        bboxes = list(bboxes)
        confidences = list(np.array(confidences).reshape(1, -1)[0])
        confidences = list(map(float, confidences))

        bbox_indices = cv2.dnn.NMSBoxes(bboxes, confidences, score_threshold=0.5, nms_threshold=0.2)

        if len(bbox_indices) != 0:
            for i in range(len(bbox_indices)):
                bbox = bboxes[np.squeeze(bbox_indices[i])]
                class_confidence = confidences[np.squeeze(bbox_indices[i])]
                class_label_id = np.squeeze(class_label_ids[np.squeeze(bbox_indices[i])])
                class_label = self.classes_list[class_label_id].upper()
                class_color = [int(c) for c in self.color_list[class_label_id]]

                display_text = "{}: {:.2f}".format(class_label, class_confidence)
                x, y, w, h = bbox  # Corrected indentation here
                cv2.rectangle(image, (x, y), (x + w, y + h), color=class_color, thickness=1)
                cv2.putText(image, display_text, (x, y - 10), cv2.FONT_HERSHEY_PLAIN, 1, class_color, 2)

    def process_video(self):
        """
        Process the video file.
        """

        cap = cv2.VideoCapture(self.video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(self.output_path, fourcc, fps, (width, height))

        start_time = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            self.detect_objects(frame)
            current_time = time.time()
            fps = 1 / (current_time - start_time)
            start_time = current_time
            cv2.putText(frame, "FPS: " + str(int(fps)), (20, 70), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)

            if self.output_path:
                out.write(frame)

            cv2.imshow('Object Detection', frame)
            key = cv2.waitKey(1)

            # Check if the 'q' key or the window close button (X) is pressed
            if key == ord('q') or cv2.getWindowProperty('Object Detection', cv2.WND_PROP_VISIBLE) < 1:
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()

In [69]:
def select_video():
    """
    Callback function for selecting the video file.
    """
    video_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4")])
    if video_path:
        video_entry.delete(0, tk.END)
        video_entry.insert(tk.END, video_path)
        enable_process_video_button()

In [70]:
def select_output():
    """
    Callback function for selecting the output file.
    """
    output_path = filedialog.asksaveasfilename(defaultextension=".mp4", filetypes=[("Video Files", "*.mp4")])
    output_entry.delete(0, tk.END)
    output_entry.insert(tk.END, output_path)
    enable_process_webcam_button()

In [71]:
def enable_process_video_button():
    """
    Helper function to enable the Process Video button.
    """
    process_video_button.config(state='normal')

In [72]:
def enable_process_webcam_button():
    """
    Helper function to enable the Process Webcam button.
    """
    process_webcam_button.config(state='normal')

In [73]:
def process_video():
    """
    Callback function for processing the video file.
    """
    video_path = video_entry.get()
    config_path = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
    model_path = "frozen_inference_graph.pb"
    classes_path = "coco.names"
    output_path = None

    object_detector = ObjectDetector(video_path, config_path, model_path, classes_path, output_path)
    object_detector.process_video()

In [74]:
def process_webcam():
    """
    Callback function for processing the webcam feed.
    """
    config_path = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
    model_path = "frozen_inference_graph.pb"
    classes_path = "coco.names"
    output_path = output_entry.get()

    video_path = 0  # Set video_path to 0 for webcam feed

    object_detector = ObjectDetector(video_path, config_path, model_path, classes_path, output_path)
    object_detector.process_video()

In [75]:
# Create the main window
window = tk.Tk()
window.title("Object Detector")

''

In [76]:
# Create and position the select video path button and entry
select_video_button = tk.Button(window, text="Select Video Path", command=select_video)
select_video_button.pack()

video_entry = tk.Entry(window, width=50)
video_entry.pack()

In [77]:
# Create and position the process video button
process_video_button = tk.Button(window, text="Process Video", command=process_video, state='disabled')
process_video_button.pack()

In [78]:
# Create and position the select output recording path button and entry
select_output_button = tk.Button(window, text="Select Output Path", command=select_output)
select_output_button.pack()
output_entry = tk.Entry(window, width=50)
output_entry.pack()

In [79]:
# Create and position the process webcam button
process_webcam_button = tk.Button(window, text="Process Webcam", command=process_webcam, state='disabled')
process_webcam_button.pack()

In [80]:
# Bind the window closing event to the exit_application function
window.protocol("WM_DELETE_WINDOW", window.destroy)

''

In [None]:
# Start the main event loop
window.mainloop()