In [2]:
import cv2
import time
from ultralytics import YOLO  # Import YOLO model handler

# Load the YOLO model
model = YOLO("yolov8n.pt")  # Replace 'yolov8n.pt' with your desired YOLO model file

# Path to the uploaded video file
video_source = "rnsit.mp4"  # Replace with the actual file path
cap = cv2.VideoCapture(video_source)

tracked_positions = {}  # Store object IDs and their last known positions and timestamps
frame_count = 0

# Function to calculate speed (needs proper calibration)
def calculate_speed(last_position, current_position, time_elapsed):
    if time_elapsed == 0:
        return 0
    distance = ((current_position[0] - last_position[0]) ** 2 + (current_position[1] - last_position[1]) ** 2) ** 0.5
    speed = distance / time_elapsed  # This speed is in pixels per second
    return speed * 3.6  # Convert to km/h assuming proper calibration

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    # Process the frame using the YOLO model
    results = model(frame, stream=True)

    for result in results:
        for bbox in result.boxes:
            # Extract bounding box, confidence, and class
            x1, y1, x2, y2 = map(int, bbox.xyxy[0].tolist())
            confidence = bbox.conf[0]
            class_id = int(bbox.cls[0])

            # Detect vehicles, pedestrians, and pets
            if class_id in [0, 2, 3, 5, 7, 16, 17]:  # 0=person, 2/3/5/7=vehicles, 16=cat, 17=dog
                if class_id == 0:
                    object_type = "Pedestrian"
                    color = (0, 255, 255)  # Yellow
                elif class_id in [2, 3, 5, 7]:
                    object_type = "Vehicle"
                    color = (0, 255, 0)  # Green
                elif class_id == 16:
                    object_type = "Cat"
                    color = (255, 0, 255)  # Magenta
                elif class_id == 17:
                    object_type = "Dog"
                    color = (255, 0, 0)  # Blue

                object_id = hash(tuple(bbox.xyxy[0].tolist()))  # Generate a unique ID
                current_time = time.time()

                # Draw the bounding box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, f"{object_type} {object_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                # Calculate speed if the object was seen before
                if object_id in tracked_positions:
                    last_position, last_time = tracked_positions[object_id]
                    current_position = ((x1 + x2) // 2, (y1 + y2) // 2)  # Center of the bounding box
                    speed = calculate_speed(last_position, current_position, current_time - last_time)
                    cv2.putText(frame, f"Speed: {speed:.2f} km/h", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                else:
                    current_position = ((x1 + x2) // 2, (y1 + y2) // 2)

                # Update position and timestamp
                tracked_positions[object_id] = (current_position, current_time)

    # Display the video feed
    cv2.imshow("Traffic, Pedestrian, and Pet Monitor", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 9 persons, 5 cars, 1 bus, 2 trucks, 1 traffic light, 249.9ms
Speed: 15.9ms preprocess, 249.9ms inference, 13.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 6 cars, 1 bus, 2 trucks, 1 traffic light, 145.0ms
Speed: 2.7ms preprocess, 145.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 5 cars, 1 airplane, 1 bus, 2 trucks, 1 traffic light, 142.0ms
Speed: 2.4ms preprocess, 142.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 4 cars, 1 bus, 3 trucks, 1 traffic light, 134.5ms
Speed: 3.5ms preprocess, 134.5ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 3 cars, 2 buss, 3 trucks, 1 traffic light, 139.9ms
Speed: 2.8ms preprocess, 139.9ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 4 cars, 1 bus, 2 trucks, 1 traffic light, 138.2ms
Speed: 2.0ms preprocess, 138.2ms inference, 2.0ms postpro

Speed: 3.2ms preprocess, 139.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 5 cars, 3 trucks, 1 traffic light, 143.2ms
Speed: 2.0ms preprocess, 143.2ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 6 cars, 2 trucks, 1 traffic light, 143.0ms
Speed: 2.6ms preprocess, 143.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 6 cars, 1 bus, 2 trucks, 1 traffic light, 142.7ms
Speed: 2.5ms preprocess, 142.7ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 6 cars, 1 bus, 1 truck, 1 traffic light, 138.7ms
Speed: 2.5ms preprocess, 138.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 6 cars, 1 truck, 1 traffic light, 123.4ms
Speed: 4.1ms preprocess, 123.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 4 cars, 1 truck, 1 traffic light, 142.1ms
S


0: 384x640 6 persons, 1 car, 5 buss, 1 truck, 145.1ms
Speed: 2.0ms preprocess, 145.1ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 1 car, 5 buss, 1 truck, 138.2ms
Speed: 1.3ms preprocess, 138.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 3 cars, 4 buss, 1 truck, 1 traffic light, 145.5ms
Speed: 2.3ms preprocess, 145.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 1 car, 3 buss, 1 truck, 1 traffic light, 141.0ms
Speed: 2.0ms preprocess, 141.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 car, 3 buss, 1 truck, 142.7ms
Speed: 2.8ms preprocess, 142.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 car, 2 buss, 1 truck, 135.4ms
Speed: 2.3ms preprocess, 135.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 cars, 2 buss, 1 truck,

Speed: 14.1ms preprocess, 139.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 cars, 3 buss, 4 trucks, 140.1ms
Speed: 3.2ms preprocess, 140.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 car, 2 buss, 5 trucks, 142.4ms
Speed: 3.7ms preprocess, 142.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 3 cars, 2 buss, 3 trucks, 134.5ms
Speed: 2.7ms preprocess, 134.5ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 2 cars, 1 bus, 4 trucks, 137.1ms
Speed: 3.1ms preprocess, 137.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 1 car, 3 buss, 3 trucks, 135.5ms
Speed: 14.5ms preprocess, 135.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 3 cars, 3 buss, 2 trucks, 138.3ms
Speed: 2.6ms preprocess, 138.3ms inference, 2.0ms postprocess per imag

KeyboardInterrupt: 