In [1]:
import cv2                                                    # For importing OpenCV

In [2]:
import matplotlib.pyplot as plt                               # For importing matplot library

In [3]:
config_file = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'  # This is the configuration file
frozen_model = 'frozen_inference_graph.pb'                    # This is the pre-trained TensorFlow model

In [4]:
model = cv2.dnn_DetectionModel(frozen_model,config_file)      # For loading the model and configuration file into memory

In [5]:
classLabels = []                                              # Creating an empty List in python
file_name = 'labels.txt'                                      # Reading the contents of 'Labels.txt'
with open(file_name,'rt') as fpt:
    classLabels = fpt.read().rstrip('\n').split('\n')         # Transferring the contents of 'Labels.txt' to 'classLabels' list

In [6]:
model.setInputSize(320,320)                                   # Input size is 320x320 as defined in the configuration file
model.setInputScale(1.0/127.5)                                # Scaling the Grey level (0-255) of image i.e. 255/2=127.5 
model.setInputMean((127.5,127.5,127.5))                       # Taking mean of 127.5 as input domain of MobileNet is [-1,1]
model.setInputSwapRB(True)                                    # Automatically converts image from BGR to RGB color space

< cv2.dnn.Model 00000238FD595290>

In [10]:
cap = cv2.VideoCapture("original_sample_video.mp4")           # Load the video

if not cap.isOpened():                                        # Check if the video opened correctly
    cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open video")

video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))          # Automatically detect video resolution
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Video Resolution: {video_width}x{video_height}")

font_scale = 2
font = cv2.FONT_HERSHEY_PLAIN

# Create a normal window
cv2.namedWindow('Object Detection Tutorial', cv2.WINDOW_NORMAL) 

fullscreen = True                                             # Flag to toggle fullscreen mode

cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or cannot fetch frame.")
        break                                                 # Break if the video ends or frame cannot be read

    # Get the screen resolution
    screen_width = cv2.getWindowImageRect('Object Detection Tutorial')[2]
    screen_height = cv2.getWindowImageRect('Object Detection Tutorial')[3]

    aspect_ratio = video_width / video_height                 # Calculate aspect ratio

    # Scale video to fit inside the window while maintaining aspect ratio
    if screen_width / screen_height > aspect_ratio:
        display_height = screen_height
        display_width = int(display_height * aspect_ratio)
    else:
        display_width = screen_width
        display_height = int(display_width / aspect_ratio)

    resized_frame = cv2.resize(frame, (display_width, display_height))

    # Center the frame with padding if necessary
    top_padding = (screen_height - display_height) // 2
    left_padding = (screen_width - display_width) // 2

    bordered_frame = cv2.copyMakeBorder(resized_frame, top_padding, top_padding, left_padding, left_padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    try:
        # Detect objects in the original frame
        ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55)
        if len(ClassIndex) != 0:
            for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox):
                if ClassInd <= 90:
                    # Adjust bounding box coordinates to account for padding
                    adjusted_box = [
                        int(boxes[0] * display_width / video_width) + left_padding,
                        int(boxes[1] * display_height / video_height) + top_padding,
                        int(boxes[2] * display_width / video_width),
                        int(boxes[3] * display_height / video_height)
                    ]
                    
                    cv2.rectangle(bordered_frame, adjusted_box, (255, 0, 0), 2)
                    
                    # Format the label text: "Class Name: Confidence%"
                    label = f"{classLabels[ClassInd - 1]}: {conf:.2f}"    # Format confidence to 2 decimal places

                    # Get text size to avoid cutting off text
                    (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, 2)
                    
                    # Adjust text position to make sure it's inside the image
                    x, y = adjusted_box[0] + 10, adjusted_box[1] + 40
                    
                    # Check if the text goes beyond the image boundaries
                    if x + text_width > screen_width:
                        x = screen_width - text_width - 10  # Adjust x if text overflows on the right
                    if y + text_height > screen_height:
                        y = screen_height - text_height - 10  # Adjust y if text overflows at the bottom
                    
                    # Display the label with adjusted position
                    cv2.putText(bordered_frame, label, (x, y), font, fontScale=font_scale, color=(0, 255, 0), thickness=2)
    
    except Exception as e:
        print(f"Error in detection: {e}")

    cv2.imshow('Object Detection Tutorial', bordered_frame)

    key = cv2.waitKey(10) & 0xFF                              # Exit when 'q' is pressed
    if key == ord('q'):
        break
    elif key == ord('f'):                                     # Toggle fullscreen mode when 'f' is pressed
        fullscreen = not fullscreen
        if fullscreen:
            cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
        else:
            cv2.setWindowProperty('Object Detection Tutorial', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL)

# Release the video capture and destroy all OpenCV windows
cap.release()
cv2.destroyAllWindows()


Video Resolution: 1920x1080
End of video or cannot fetch frame.
