In [42]:
import cv2
import pandas as pd
import numpy as np
import torch
from ultralytics import YOLO
from tracker import Tracker  

In [43]:
model = YOLO('models/yolo12s.pt')  

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Using device: {device}")


Using device: cpu


In [44]:
class_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

# Filter classes to track only vehicles
my_class = ['car', 'motorcycle', 'bus', 'truck', 'rickshaw', 'lorry','bicyle']

In [45]:
tracker=Tracker()
video_path = 'videos/road_feed.mp4'
video = cv2.VideoCapture(video_path)

fps = video.get(cv2.CAP_PROP_FPS)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Video FPS: {fps}, Resolution: {width}x{height}")

Video FPS: 24.0, Resolution: 360x640


In [46]:

vehicle_counts = {
    'car': 0,
    'motorcycle': 0,
    'bus': 0,
    'truck': 0,
    'rickshaw': 0,
    'lorry': 0
}


In [47]:
counted_ids = set()

frame_dict = {}
frame_no = 0

previous_positions = {}

In [48]:
def search_frame(current, target_id):
    for frame in range(current, 0, -1):
        for data in frame_dict[frame]:
            id, cx, cy = data
            if id == target_id:
                return [frame, cx, cy]
    return None

def distance(xs, ys, xe, ye):
    return np.sqrt((xe - xs)**2 + (ye - ys)**2)


In [49]:
counter_line_y = 540  


real_world_distance = 4.0
pixel_distance = 100
pixel_to_meter_ratio = real_world_distance / pixel_distance 

while True:
    ret, frame = video.read()
    if not ret:
        break

    frame = cv2.resize(frame, (540, 960))

    results = model.predict(frame)
    temp = results[0].boxes.data
    temp = temp.detach().cpu().numpy()
    bb_df = pd.DataFrame(temp).astype("int")


    bb_list = []
    for index, row in bb_df.iterrows():
        x1, y1, x2, y2, sc, d = row
        c = class_list[d]
        if c in my_class:
            bb_list.append([x1, y1, x2, y2, sc, c])

    bbox_ids = tracker.update(bb_list)

    # Draw bounding boxes and IDs on the frame
    for bbox in bbox_ids:
        x3, y3, x4, y4, sc, c, id = bbox
        cx = int(x3 + x4) // 2
        cy = int(y3 + y4) // 2

        # Draw bounding box and label
        cv2.putText(frame, str(c), (x3, y3 - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1)
        cv2.rectangle(frame, (x3, y3), (x4, y4), (0, 255, 0), 1)


        # Check if vehicle crosses the counter line
        offset = 5
        if counter_line_y < (cy + offset) and counter_line_y > (cy - offset):
            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            cv2.putText(frame, str(id), (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)

            # Ensure each vehicle is counted only once
            if id not in counted_ids:
                counted_ids.add(id)
                vehicle_counts[c] += 1

        # Calculate speed if the vehicle was detected in the previous frame
        if id in previous_positions:
            prev_cx, prev_cy = previous_positions[id]
            pixel_distance_traveled = np.sqrt((cx - prev_cx)**2 + (cy - prev_cy)**2)  # Distance in pixels
            real_distance_traveled = pixel_distance_traveled * pixel_to_meter_ratio  # Distance in meters
            time_elapsed = 1 / fps  # Time in seconds
            speed = (real_distance_traveled / time_elapsed) * 3.6  # Convert to km/h

            cv2.putText(frame, f"{speed:.2f} km/h", (cx, cy + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)

        previous_positions[id] = (cx, cy)

    y_offset = 40
    for vehicle_type, count in vehicle_counts.items():
        cv2.putText(frame, f'{vehicle_type}: {count}', (60, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        y_offset += 30  # Move down for the next line

    cv2.putText(frame, f'Total Vehicles: {len(counted_ids)}', (60, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    cv2.imshow("Vehicle Detection", frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

video.release()
cv2.destroyAllWindows()


0: 640x384 10 cars, 408.8ms
Speed: 17.6ms preprocess, 408.8ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 cars, 162.8ms
Speed: 3.2ms preprocess, 162.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 cars, 141.9ms
Speed: 2.5ms preprocess, 141.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 cars, 155.2ms
Speed: 2.5ms preprocess, 155.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 151.0ms
Speed: 3.3ms preprocess, 151.0ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 cars, 147.4ms
Speed: 2.6ms preprocess, 147.4ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 cars, 148.7ms
Speed: 2.5ms preprocess, 148.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 cars, 170.0ms
Speed: 2.8ms preprocess, 170.0ms inference, 0.9ms postprocess per image at shape (1, 