In [2]:
import torch
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load YOLOv8 model
model = YOLO('/home/dineshkrishna/Dinesh_seneca/Sem_2/CV/Project/yolo-project/yolov8_combined_modelV2.pt')  # or specify the path to your YOLOv8 weights

# Initialize Deep SORT tracker
tracker = DeepSort(max_age=30, n_init=2)

# Try accessing the webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()

# Initialize count variables
phone_count = 0
person_count = 0
phone_ids = set()
person_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # Perform detection
    results = model(frame)

    # Prepare detections for tracker
    detections = []
    class_labels = []
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())  # Extract bounding box coordinates
            confidence = box.conf.item()  # Confidence score
            label = box.cls.item()  # Class label index
            class_name = model.names[int(label)]  # Convert label index to class name

            if class_name == 'cell phone' or class_name == 'person':
                width = x2 - x1
                height = y2 - y1
                detections.append(([x1, y1, width, height], confidence))
                class_labels.append(class_name)

    # Update tracker
    tracks = tracker.update_tracks(detections, frame=frame)

    # Track phone and person IDs
    current_phone_ids = set()
    current_person_ids = set()
    for track, class_name in zip(tracks, class_labels):
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)

        if class_name == 'cell phone':
            current_phone_ids.add(track_id)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, 'cell phone', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        elif class_name == 'person':
            current_person_ids.add(track_id)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, 'person', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    # Update phone and person counts
    new_phone_ids = current_phone_ids - phone_ids
    new_person_ids = current_person_ids - person_ids
    phone_count += len(new_phone_ids)
    person_count += len(new_person_ids)
    phone_ids = current_phone_ids
    person_ids = current_person_ids

    # Display the counts in the top left corner
    cv2.putText(frame, f'Phones: {phone_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f'Persons: {person_count}', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Display the frame
    cv2.imshow('Detection', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 2 persons, 62.8ms
Speed: 2.3ms preprocess, 62.8ms inference, 266.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 2.4ms
Speed: 1.0ms preprocess, 2.4ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 6.0ms
Speed: 1.6ms preprocess, 6.0ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 5.3ms
Speed: 1.1ms preprocess, 5.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 4.4ms
Speed: 0.7ms preprocess, 4.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 2.8ms
Speed: 0.7ms preprocess, 2.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 2.7ms
Speed: 0.9ms preprocess, 2.7ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 2.8ms
Speed: 0.8ms preprocess, 2.8ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 6