In [1]:
import cv2 as cv
import time
import os
from yolo_detector import YoloDetector
from tracker import Tracker

In [19]:
MODEL_PATH = "models/yolo11n"
VIDEO_PATH = 2
# VIDEO_PATH = "data/test/traffic.mp4"

def main():
    # Initialize YOLO and Deep SORT
    detector = YoloDetector(model_path=MODEL_PATH, confidence=0.3)
    print(detector.model.names)
    
    tracker = Tracker()

    # Video input source: webcam if integer, video file otherwise
    is_webcam = isinstance(VIDEO_PATH, int) or VIDEO_PATH.isdigit()
    cap = cv.VideoCapture(int(VIDEO_PATH)) if is_webcam else cv.VideoCapture(VIDEO_PATH)

    # Set webcam resolution (only applies to webcam)
    if is_webcam:
        cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720)

    if not cap.isOpened():
        print("Error: Unable to open video source.")
        return

    # Output filename
    if is_webcam:
        output_filename = "data/output/webcam_result.mp4"
    else:
        input_name = os.path.splitext(os.path.basename(VIDEO_PATH))[0]
        output_filename = f"data/output/{input_name}_output.mp4"

    # Get video properties
    fps = 5 if is_webcam else int(cap.get(cv.CAP_PROP_FPS)) or 25
    width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv.VideoWriter_fourcc(*'mp4v')
    out = cv.VideoWriter(output_filename, fourcc, fps, (width, height))

    print(f"\n[INFO] Saving output to: {output_filename}")


    while True:
        ret, frame = cap.read()
        if not ret:
            break

        start_time = cv.getTickCount()

        frame_resized = cv.resize(frame, (540, 480), interpolation=cv.INTER_LINEAR)

        detections = detector.detect(frame_resized)

        scale_x = frame.shape[1] / frame_resized.shape[1]
        scale_y = frame.shape[0] / frame_resized.shape[0]

        scaled_detections = []
        for (bbox, class_number, conf) in detections:
            x, y, w, h = bbox
            x = int(x * scale_x)
            y = int(y * scale_y)
            w = int(w * scale_x)
            h = int(h * scale_y)
            scaled_detections.append(([x, y, w, h], class_number, conf))

        tracking_ids, boxes = tracker.track(scaled_detections, frame)

        for detection, tracking_id, bounding_box in zip(scaled_detections, tracking_ids, boxes):
            (bbox, class_number, conf) = detection
            if conf < 0.3:
                continue

            x1, y1, w, h = bbox
            class_name = detector.model.names[class_number]

            cv.rectangle(frame, (int(bounding_box[0]), int(bounding_box[1])),
                         (int(bounding_box[2]), int(bounding_box[3])), (0, 255, 0), 2)
            cv.putText(frame, f"{class_name} ID:{tracking_id}", (x1, y1 - 10),
                       cv.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

        end_time = cv.getTickCount()
        fps_value = cv.getTickFrequency() / (end_time - start_time)

        cv.putText(frame, f"FPS: {fps_value:.2f}", (20, 30),
                   cv.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)

        cv.imshow("Detection", frame)
        out.write(frame)

        key = cv.waitKey(1) & 0xFF
        if key == ord("q") or key == 27:
            break

    cap.release()
    out.release()
    cv.destroyAllWindows()
    print("[INFO] Video processing complete.")

if __name__ == "__main__":
    main()


{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw