In [1]:
from ultralytics import YOLO

import time
import torch
import cv2
import torch.backends.cudnn as cudnn
from PIL import Image
import colorsys
import numpy as np


In [2]:
from deep_sort.utils.parser import get_config
from deep_sort.deep_sort import DeepSort
from deep_sort.sort.tracker import Tracker

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
tracker = DeepSort(model_path=deep_sort_weights, max_age=70)

In [3]:
# Define the video path
import cv2
import torch


video_path = 'input_videos\Surveillance_ Thieves rip open ATM in Shelbyville.mp4'

cap = cv2.VideoCapture(video_path)

# Get the video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_path = 'output.mp4'
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


  video_path = 'input_videos\Surveillance_ Thieves rip open ATM in Shelbyville.mp4'


In [4]:
frames = []

unique_track_ids = set()

In [5]:
import cv2
import numpy as np
import time

# Initialize the YOLO model
model = YOLO("yolov8n.pt")  # Load the model once before the loop

# Open the webcam
cap = cv2.VideoCapture(0)  # 0 is the default camera, change if needed

# Initialize variables
counter, fps, elapsed = 0, 0, 0
start_time = time.perf_counter()
unique_track_ids = set()
track_labels = {}
track_times = {}
fps = cap.get(cv2.CAP_PROP_FPS)  # Get the frame rate of the video

while cap.isOpened():
    ret, frame = cap.read()

    if ret:
        # Flip the frame horizontally to correct the mirrored effect
        frame = cv2.flip(frame, 1)

        og_frame = frame
        frame = og_frame.copy()

        results = model(frame, device=0, classes=0, conf=0.5)

        class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

        active_track_ids = set()

        for result in results:
            boxes = result.boxes
            cls = boxes.cls.tolist()
            xyxy = boxes.xyxy
            conf = boxes.conf
            xywh = boxes.xywh

            for class_index in cls:
                class_name = class_names[int(class_index)]
                print("Class:", class_name)

        pred_cls = np.array(cls)
        conf = conf.detach().cpu().numpy()
        xyxy = xyxy.detach().cpu().numpy()
        bboxes_xywh = xywh.cpu().numpy()

        tracks = tracker.update(bboxes_xywh, conf, og_frame)

        for track in tracker.tracker.tracks:
            track_id = track.track_id
            hits = track.hits
            x1, y1, x2, y2 = track.to_tlbr()
            w = x2 - x1
            h = y2 - y1

            red_color = (0, 0, 255)
            blue_color = (255, 0, 0)
            green_color = (0, 255, 0)

            color_id = track_id % 3
            color = red_color if color_id == 0 else blue_color if color_id == 1 else green_color

            cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

            # Track the label and start time for the detected object
            if track_id not in track_labels:
                track_labels[track_id] = class_names[int(cls[0])]  # Assuming each track belongs to one class
                track_times[track_id] = 0  # Initialize the time spent

            # Update the time spent (in frames)
            track_times[track_id] += 1

            # Calculate time spent in minutes and seconds
            total_seconds = track_times[track_id] / fps
            minutes = int(total_seconds // 60)
            seconds = int(total_seconds % 60)

            text_color = (0, 0, 0)
            cv2.putText(og_frame, f"{track_labels[track_id]} {minutes:02}:{seconds:02}", 
                        (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1, cv2.LINE_AA)

            active_track_ids.add(track_id)

        lost_track_ids = unique_track_ids - active_track_ids
        for lost_track_id in lost_track_ids:
            track_labels.pop(lost_track_id, None)
            track_times.pop(lost_track_id, None)

        unique_track_ids.intersection_update(active_track_ids)
        unique_track_ids.update(active_track_ids)

        person_count = len(unique_track_ids)

        current_time = time.perf_counter()
        elapsed = (current_time - start_time)
        counter += 1
        if elapsed > 1:
            fps = counter / elapsed
            counter = 0
            start_time = current_time

        cv2.putText(og_frame, f"Person Count: {person_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.namedWindow("Video", cv2.WINDOW_NORMAL)
        cv2.resizeWindow("Video", 960, 540)
        cv2.imshow("Video", og_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 71.2ms
Speed: 16.1ms preprocess, 71.2ms inference, 79.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 55.7ms
Speed: 7.5ms preprocess, 55.7ms inference, 337.6ms postprocess per image at shape (1, 3, 480, 640)
Class: person

0: 480x640 1 person, 56.8ms
Speed: 15.9ms preprocess, 56.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
Class: person

0: 480x640 1 person, 72.8ms
Speed: 0.0ms preprocess, 72.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
Class: person

0: 480x640 1 person, 12.5ms
Speed: 0.0ms preprocess, 12.5ms inference, 15.4ms postprocess per image at shape (1, 3, 480, 640)
Class: person

0: 480x640 1 person, 10.9ms
Speed: 0.0ms preprocess, 10.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
Class: person

0: 480x640 (no detections), 14.1ms
Speed: 0.0ms preprocess, 14.1ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections),