In [1]:
import cv2                                                      # For importing OpenCV

In [2]:
import matplotlib.pyplot as plt                                 # For importing matplot library

In [3]:
import time                                                     # For importing time module

In [4]:
config_file = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'    # This is the configuration file
frozen_model = 'frozen_inference_graph.pb'                      # This is the pre-trained TensorFlow model

In [5]:
model = cv2.dnn_DetectionModel(frozen_model, config_file)       # For loading the model and configuration file into memory

In [6]:
classLabels = []                                                # Creating an empty List in python
file_name = 'labels.txt'                                        # Reading the contents of 'Labels.txt'
with open(file_name, 'rt') as fpt:
    classLabels = fpt.read().rstrip('\n').split('\n')           # Transferring the contents of 'Labels.txt' to 'classLabels' list

In [7]:
model.setInputSize(320, 320)                                    # Input size is 320x320 as defined in the configuration file
model.setInputScale(1.0 / 127.5)                                # Scaling the Grey level (0-255) of image i.e. 255/2=127.5
model.setInputMean((127.5, 127.5, 127.5))                       # Taking mean of 127.5 as input domain of MobileNet is [-1,1]
model.setInputSwapRB(True)                                      # Automatically converts image from BGR to RGB color space

< cv2.dnn.Model 000002072A894890>

In [8]:
sample_video = "original_sample_video.mp4"                      # provide the name of the sample video

# Prompt for mode selection with numeric input
print("1. Real-Time Object Detection through Webcam")
print("2. Non-Real-Time Object Detection through Sample Video")

mode = input("Select detection mode (1 or 2): ").strip()

# Validate the input
while mode not in ['1', '2']:
    print("Invalid option!")
    mode = input("Select detection mode (1 or 2): ").strip()

# Get webcam resolution if real-time mode is chosen
if mode == '1':                                                 # Real-Time Object Detection through Webcam
    webcam_width = int(input("Enter webcam width (e.g., 640): "))
    webcam_height = int(input("Enter webcam height (e.g., 480): "))
    cap = cv2.VideoCapture(0)  # Open webcam
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height)
    video_width, video_height = webcam_width, webcam_height     # Use webcam resolution
else:                                                           # Non-Real-Time Detection through Sample Video
    cap = cv2.VideoCapture(sample_video)                        # Load the video
    video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))        # Get video resolution
    video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
if not cap.isOpened():                                          # Check if the video or webcam opened correctly
    raise IOError("Cannot open video or webcam!")

font_scale = 2
font = cv2.FONT_HERSHEY_PLAIN

cv2.namedWindow('Object Detection Tutorial', cv2.WINDOW_NORMAL)    # Create a normal window

fullscreen = True                                               # Flag to toggle fullscreen mode
cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

# Variables for FPS calculation
frame_count = 0
start_time = time.time()
avg_fps = 0
fps_window = 30                                                 # Number of frames to average over for smooth FPS calculation
frame_times = []

aspect_ratio = video_width / video_height                       # Calculate the aspect ratio once before the loop starts

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or cannot fetch frame.")
        break                                                   # Break if the video ends or frame cannot be read

    frame_start_time = cv2.getTickCount()                       # Real-time FPS calculation (start measuring time for each frame)

    # Get the screen resolution
    screen_width = cv2.getWindowImageRect('Object Detection Tutorial')[2]
    screen_height = cv2.getWindowImageRect('Object Detection Tutorial')[3]

    # Use the pre-calculated aspect ratio for scaling
    if screen_width / screen_height > aspect_ratio:
        display_height = screen_height
        display_width = int(display_height * aspect_ratio)
    else:
        display_width = screen_width
        display_height = int(display_width / aspect_ratio)

    resized_frame = cv2.resize(frame, (display_width, display_height))

    # Center the frame with padding if necessary
    top_padding = (screen_height - display_height) // 2
    left_padding = (screen_width - display_width) // 2

    bordered_frame = cv2.copyMakeBorder(resized_frame, top_padding, top_padding, left_padding, left_padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    try:
        # Detect objects in the original frame
        ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55)
        if len(ClassIndex) != 0:
            for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox):
                if ClassInd <= 90:
                    # Adjust bounding box coordinates to account for padding
                    adjusted_box = [
                        int(boxes[0] * display_width / video_width) + left_padding,
                        int(boxes[1] * display_height / video_height) + top_padding,
                        int(boxes[2] * display_width / video_width),
                        int(boxes[3] * display_height / video_height)
                    ]

                    cv2.rectangle(bordered_frame, adjusted_box, (255, 0, 0), 2)

                    # Format the label text: "Class Name: Confidence%"
                    label = f"{classLabels[ClassInd - 1]}: {conf:.2f}"    # Format confidence to 2 decimal places

                    # Get text size to avoid cutting off text
                    (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, 2)

                    # Adjust text position to make sure it's inside the image
                    x, y = adjusted_box[0] + 10, adjusted_box[1] + 40

                    # Check if the text goes beyond the image boundaries
                    if x + text_width > screen_width:
                        x = screen_width - text_width - 10      # Adjust x if text overflows on the right
                    if y + text_height > screen_height:
                        y = screen_height - text_height - 10    # Adjust y if text overflows at the bottom

                    # Display the label with adjusted position
                    cv2.putText(bordered_frame, label, (x, y), font, fontScale=font_scale, color=(0, 255, 0), thickness=2)

    except Exception as e:
        print(f"Error in detection: {e}")

    # Calculate FPS for current frame
    frame_end_time = cv2.getTickCount()
    time_per_frame = (frame_end_time - frame_start_time) / cv2.getTickFrequency()
    real_time_fps = 1 / time_per_frame

    frame_times.append(time_per_frame)                          # Maintain running sum of frame times

    # Remove the oldest frame time if the window exceeds the size
    if len(frame_times) > fps_window:
        frame_times.pop(0)

    avg_fps = fps_window / sum(frame_times)                     # Calculate average FPS

    # Display FPS on the frame
    fps_text = f"Real-time FPS: {real_time_fps:.2f} | Avg FPS: {avg_fps:.2f}"
    cv2.putText(bordered_frame, fps_text, (10, 35), font, fontScale=font_scale, color=(0, 0, 255), thickness=2)

    cv2.imshow('Object Detection Tutorial', bordered_frame)     # Show the frame with FPS

    key = cv2.waitKey(10) & 0xFF                                # Exit when 'q' is pressed
    if key == ord('q'):
        break
    elif key == ord('f'):                                       # Toggle fullscreen mode when 'f' is pressed
        fullscreen = not fullscreen
        if fullscreen:
            cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
        else:
            cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL)

# Release the video capture and destroy all OpenCV windows
cap.release()
cv2.destroyAllWindows()

1. Real-Time Object Detection through Webcam
2. Non-Real-Time Object Detection through Sample Video


Select detection mode (1 or 2):  1
Enter webcam width (e.g., 640):  1920
Enter webcam height (e.g., 480):  1080
