In [2]:
import cv2
import numpy as np
import time
from sort import Sort  # Sort tracking algorithm

In [3]:
# Load model and class labels
config_file = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
frozen_model = 'frozen_inference_graph.pb'
model = cv2.dnn_DetectionModel(frozen_model, config_file)

In [4]:
classLabels = []
file_name = 'labels.txt'
with open(file_name, 'rt') as fpt:
    classLabels = fpt.read().rstrip('\n').split('\n')

In [5]:
# Configure model input
model.setInputSize(320, 320)
model.setInputScale(1.0 / 127.5)
model.setInputMean((127.5, 127.5, 127.5))
model.setInputSwapRB(True)


< cv2.dnn.Model 000002C385C62670>

In [6]:
# Function to draw text with background
def draw_text_with_background(img, text, x, y, font=cv2.FONT_HERSHEY_PLAIN, font_scale=2, thickness=2):
    (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]
    cv2.rectangle(img, (x, y - text_height - 10), (x + text_width + 10, y), (0, 0, 0), -1)
    cv2.putText(img, text, (x, y - 10), font, fontScale=font_scale, color=(255, 255, 255), thickness=thickness)


In [7]:
# Bounding box colors for different classes
bbox_colors = np.random.uniform(0, 255, size=(len(classLabels), 3))

In [8]:
# FPS calculation variables
fps_start_time = 0
fps_frame_counter = 0

In [9]:
# Object counting variables
object_counts = {label: 0 for label in classLabels}

In [11]:
# Read the video file
cap = cv2.VideoCapture("Traffic.mp4")
if not cap.isOpened():
    raise IOError("Cannot open video file")

# Video writer for output
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, (video_width, video_height))

# Initialize SORT tracker
tracker = Sort()

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Object detection
    ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55)
    
    # Convert bounding boxes to the format required by SORT
    if len(bbox) > 0:
        bbox_for_sort = np.array([list(map(float, box)) for box in bbox])
    else:
        bbox_for_sort = np.empty((0, 4))

    # Track objects using SORT
    track_bboxes = tracker.update(bbox_for_sort)
    
    # Ensure the ClassIndex array is properly formatted
    if len(ClassIndex) > 0 and isinstance(ClassIndex, np.ndarray):
        ClassIndex = ClassIndex.flatten()
    
    if len(track_bboxes) != 0:
        for i, bbox in enumerate(track_bboxes):
            if len(bbox) >= 4:
                left, top, right, bottom = bbox[:4].astype(int)
                width = right - left
                height = bottom - top
                
                # Ensure width and height are valid
                if width > 0 and height > 0:
                    # Draw tracked bounding box in red
                    cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
                    cv2.putText(frame, f'Tracked {i}', (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
                    
                    # Increment object count
                    if i < len(ClassIndex) and ClassIndex[i] <= len(classLabels):
                        object_counts[classLabels[ClassIndex[i] - 1]] += 1

    # Draw detected bounding boxes in green
    if len(ClassIndex) != 0:
        for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox):
            if ClassInd <= len(classLabels):
                if isinstance(boxes, (list, tuple, np.ndarray)) and len(boxes) == 4:
                    x, y, w, h = boxes
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    draw_text_with_background(frame, f'{classLabels[ClassInd - 1]}: {conf * 100:.2f}%', x, y)

    # Calculate FPS
    fps_frame_counter += 1
    if fps_frame_counter == 1:
        fps_start_time = time.time()
    else:
        if fps_frame_counter % 10 == 0:
            fps_end_time = time.time()
            fps = fps_frame_counter / (fps_end_time - fps_start_time)
            fps_frame_counter = 0
            fps_start_time = time.time()
            draw_text_with_background(frame, f"FPS: {fps:.2f}", 10, 50, font_scale=2, thickness=2)
    
    # Display object counts in multiple lines if necessary
    count_texts = []
    line_text = ""
    for label, count in object_counts.items():
        if count > 0:  # Only include non-zero counts
            new_text = f"{label}: {count}, "
            if len(line_text) + len(new_text) > 60:  # Adjust the line length as needed
                count_texts.append(line_text)
                line_text = new_text
            else:
                line_text += new_text
    count_texts.append(line_text)  # Add the last line

    y_offset = 100
    for text in count_texts:
        draw_text_with_background(frame, text.strip(", "), 10, y_offset, font_scale=1.5, thickness=2)
        y_offset += 40  # Adjust the vertical spacing as needed

    
    # Save frames with detections
    if len(ClassIndex) != 0:
        cv2.imwrite(f'detected_frames/frame_{fps_frame_counter}.jpg', frame)
    
    # Write frame to output video
    out.write(frame)
    
    # Display video feed
    cv2.imshow('Object Detection', frame)
    
    # Keyboard controls
    key = cv2.waitKey(1)
    if key == ord('q'):  # Exit when 'q' is pressed
        break
    elif key == ord('p'):  # Pause when 'p' is pressed
        while True:
            key2 = cv2.waitKey(1)
            cv2.imshow('Object Detection', frame)
            if key2 == ord('p') or key2 == ord('q'):  # Press 'p' again to resume or 'q' to quit
                break


# Release video capture and close windows
cap.release()
out.release()
cv2.destroyAllWindows()