In [1]:
# Cell 1: Imports and confirm CUDA
import cv2
import numpy as np
import time
import pandas as pd
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

# Confirm GPU
print("PyTorch:", torch.__version__)
print("Built with CUDA:", torch.version.cuda)
print("cuda available?", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


PyTorch: 2.5.1
Built with CUDA: 12.1
cuda available? True
GPU: NVIDIA GeForce GTX 1650


In [2]:
# Cell 2: Model and tracker setup

# 1) Load the YOLOv8x model
model = YOLO("yolov8x.pt")

# 2) Send it to GPU
model.to("cuda:0")

# 3) Initialize DeepSORT tracker
tracker = DeepSort(max_age=30, n_init=2)

# 4) Define which classes to track
target_classes = ['car', 'bus', 'truck', 'motorcycle']

# 5) Prepare storage for results
vehicle_info = {}
last_positions = {}
last_times = {}

print("Model loaded on", next(model.model.parameters()).device)
print("Tracker ready.")


Model loaded on cuda:0
Tracker ready.


In [3]:
# Cell 3: Speed estimation helper

# Calibrate this: number of pixels that correspond to 1 meter in your video
PIXELS_PER_METER = 10  

def estimate_speed(prev_pos, curr_pos, dt):
    """
    Estimate speed (km/h) given previous and current positions (x, y) and time delta (seconds).
    """
    if prev_pos is None or curr_pos is None or dt <= 0:
        return 0.0
    dx = curr_pos[0] - prev_pos[0]
    dy = curr_pos[1] - prev_pos[1]
    dist_pixels = (dx**2 + dy**2)**0.5
    dist_m = dist_pixels / PIXELS_PER_METER
    speed_m_s = dist_m / dt
    speed_kmh = speed_m_s * 3.6
    return round(speed_kmh, 2)

print("Speed helper ready. PIXELS_PER_METER =", PIXELS_PER_METER)


Speed helper ready. PIXELS_PER_METER = 10


In [4]:
# Cell 4: Video I/O setup

video_path = "traffic_video5.mp4"  # change if needed
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise RuntimeError(f"Failed to open {video_path}")

# Original dimensions & FPS
orig_width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps         = cap.get(cv2.CAP_PROP_FPS) or 30.0

# Downscale for faster inference
proc_width  = orig_width // 2
proc_height = orig_height // 2

# Output writer at original size
output_path = "output_video5.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (orig_width, orig_height))

print(f"Video opened: {orig_width}×{orig_height} @ {fps:.1f}FPS")
print(f"Processing resolution: {proc_width}×{proc_height}")
print("Output will be saved to", output_path)


Video opened: 1280×720 @ 28.9FPS
Processing resolution: 640×360
Output will be saved to output_video5.mp4


In [5]:
# Cell 5: process_frame with exact YOLO boxes only
def process_frame(frame, tracker, t):
    # 1) Run YOLO on full-res frame
    results = model(frame, conf=0.3, iou=0.5)

    # 2) Collect raw YOLO detections
    dets = []  # each = ((x1,y1,x2,y2), confidence, class_name)
    for r in results:
        for b in r.boxes:
            x1, y1, x2, y2 = map(int, b.xyxy[0].cpu().numpy())
            conf           = float(b.conf[0].cpu().numpy())
            name           = r.names[int(b.cls[0].cpu().numpy())]
            if name in target_classes:
                dets.append(((x1, y1, x2, y2), conf, name))

    # 3) Update DeepSORT with those detections
    tracks = tracker.update_tracks(dets, frame=frame)

    # 4) Draw only the matched YOLO box for each track
    for tr in tracks:
        if not tr.is_confirmed():
            continue
        tid = tr.track_id

        # find the YOLO det that spawned this track
        matched_box = None
        for (box, conf, name) in dets:
            # compare YOLO-box top-left to track.to_ltrb() top-left
            tx, ty, _, _ = map(int, tr.to_ltrb())
            if abs(box[0] - tx) < 20 and abs(box[1] - ty) < 20:
                matched_box = (box, name)
                break

        if not matched_box:
            continue  # skip any tracks without a fresh detection

        (x1, y1, x2, y2), label = matched_box

        # speed calc
        center       = ((x1+x2)//2, (y1+y2)//2)
        prev_p       = last_positions.get(tid)
        prev_t       = last_times.get(tid)
        spd          = estimate_speed(prev_p, center, t-prev_t) if prev_t else 0.0
        last_positions[tid] = center
        last_times[tid]     = t

        # draw the exact YOLO box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.putText(
            frame,
            f"ID {tid} {label} {spd:.1f}km/h",
            (x1, y1-10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0,255,0),
            2
        )

        # store for summary
        vehicle_info.setdefault(tid, {"label": label, "speeds": []})
        vehicle_info[tid]["speeds"].append(spd)

    # 5) Overlay count & avg speed
    total = len(vehicle_info)
    all_s = [s for info in vehicle_info.values() for s in info["speeds"]]
    avg   = round(sum(all_s)/len(all_s),1) if all_s else 0.0
    txt   = f"Count: {total}   Avg: {avg} km/h"
    cv2.rectangle(frame, (0,0), (300,30), (0,0,0), -1)
    cv2.putText(frame, txt, (10,20),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 2, cv2.LINE_AA)

    return frame

print("process_frame updated — drawing only YOLO boxes.")


process_frame updated — drawing only YOLO boxes.


In [6]:
# Cell 6: Video processing loop

frame_count    = 0
skip_interval  = 2   # process every 2nd frame to lower load
start_time     = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    current_time = time.time() - start_time

    # Skip some frames if desired
    if frame_count % skip_interval != 0:
        out.write(frame)
        continue

    # Process and write
    processed = process_frame(frame, tracker, current_time)
    out.write(processed)

    # Optional display
    cv2.imshow("Traffic Detection", processed)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()
torch.cuda.empty_cache()

print("✅ Video processing complete. Saved to:", output_path)



0: 384x640 8 persons, 12 cars, 5 motorcycles, 1 bus, 9 trucks, 105.8ms
Speed: 4.3ms preprocess, 105.8ms inference, 165.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 13 cars, 4 motorcycles, 1 bus, 11 trucks, 100.6ms
Speed: 3.0ms preprocess, 100.6ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 11 cars, 5 motorcycles, 1 bus, 7 trucks, 96.7ms
Speed: 2.8ms preprocess, 96.7ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 12 cars, 5 motorcycles, 8 trucks, 94.2ms
Speed: 2.0ms preprocess, 94.2ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 12 cars, 5 motorcycles, 8 trucks, 94.3ms
Speed: 2.7ms preprocess, 94.3ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 15 cars, 5 motorcycles, 1 bus, 7 trucks, 94.0ms
Speed: 2.0ms preprocess, 94.0ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)