In [1]:
!pip install ultralytics opencv-python numpy



In [3]:
import cv2
from ultralytics import YOLO
import datetime

# Load the YOLO11m model
model = YOLO('yolo11m.pt') 

# Video source
video_path = "Input_Video.mp4"
cap = cv2.VideoCapture(video_path)

# Class IDs for vehicles in the COCO dataset
vehicle_classes = [2, 3, 5, 7]  # Car, Motorcycle, Bus, Truck

# Map class IDs to vehicle types
vehicle_labels = {
    2: "Car",
    3: "Motorcycle",
    5: "Bus",
    7: "Truck"
}

# Get the video frame width and height for saving the output video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))


current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") #I used datatime library just for naming output video
output_video_path = f'Output_video_{current_time}.mp4'


fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4 format
out = cv2.VideoWriter(output_video_path, fourcc, 30, (frame_width, frame_height))
total_cars = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame, stream=True)

    # Process detections
    for result in results:
        for box in result.boxes:
            classId = int(box.cls[0])  
            confidence = float(box.conf[0]) 
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # Check if the detected object is a vehicle
            if classId in vehicle_classes:
                vehicle_type = vehicle_labels.get(classId, "Unknown")
                
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                label = f"{vehicle_type} ({confidence:.2f})"
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

    # Write the frame to the output video
    out.write(frame)

    # Show the frame
    cv2.imshow("Vehicle Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
out.release()  
cv2.destroyAllWindows()



0: 384x640 2 persons, 14 cars, 4 buss, 409.7ms
Speed: 30.5ms preprocess, 409.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 14 cars, 4 buss, 371.7ms
Speed: 17.7ms preprocess, 371.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 12 cars, 2 motorcycles, 4 buss, 346.7ms
Speed: 2.7ms preprocess, 346.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 13 cars, 1 motorcycle, 4 buss, 358.6ms
Speed: 4.7ms preprocess, 358.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 13 cars, 1 motorcycle, 4 buss, 365.8ms
Speed: 17.3ms preprocess, 365.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 13 cars, 4 buss, 387.2ms
Speed: 2.5ms preprocess, 387.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 13 cars, 4 buss, 381.6ms
Speed: 5.2ms preprocess, 381.6ms 


0: 384x640 1 person, 13 cars, 4 buss, 409.0ms
Speed: 7.7ms preprocess, 409.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 4 buss, 376.8ms
Speed: 5.3ms preprocess, 376.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 4 buss, 363.1ms
Speed: 2.2ms preprocess, 363.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 4 buss, 340.4ms
Speed: 5.8ms preprocess, 340.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 4 buss, 337.3ms
Speed: 6.5ms preprocess, 337.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 4 buss, 350.3ms
Speed: 4.3ms preprocess, 350.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 4 buss, 338.6ms
Speed: 2.1ms preprocess, 338.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 4 buss, 327.9ms
Speed: 4

KeyboardInterrupt: 