In [11]:
import cv2
import torch
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO(r"C:\Users\Sambhavi Roy\Downloads\Bismillah yolov8\runs\train5 10 epochs\weights\best.pt")

# Load the video feed from the dashcam
cap = cv2.VideoCapture(r"C:\Users\Sambhavi Roy\Downloads\S3 Meenambakkam signal1.mp4")

# Check video capture properties
print(f"Frame Width: {cap.get(cv2.CAP_PROP_FRAME_WIDTH)}")
print(f"Frame Height: {cap.get(cv2.CAP_PROP_FRAME_HEIGHT)}")
print(f"Total Frames: {cap.get(cv2.CAP_PROP_FRAME_COUNT)}")

# Define codec and create VideoWriter object to save the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
out = cv2.VideoWriter('s3meen1Y8output_v8.mp4', fourcc, 30.0, (640, 480))  # Adjust the FPS (30.0) and frame size (640x480) as needed

# Known parameters (you need to adjust these values)
KNOWN_HEIGHT = 0.6  # height of the traffic light in meters (example)
FOCAL_LENGTH = 2623  # Convert focal length to pixels (approx, adjust based on your setup)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Display the original frame
    cv2.imshow('Original Frame', frame)  # Show the original frame for debugging

    # Resize frame if necessary
    frame = cv2.resize(frame, (640, 480))  # Resize to a standard dimension

    # Perform inference on the current frame
    results = model(frame)

    # Loop through the detections
    for result in results:  # YOLOv8 results structure
        boxes = result.boxes  # Boxes object containing bounding box and confidence info
        for box in boxes:
            x_min, y_min, x_max, y_max = map(int, box.xyxy[0])  # Extracting the bounding box coordinates
            height_in_pixels = y_max - y_min  # Height of bounding box in pixels

            # Calculate the distance
            if height_in_pixels > 0:  # Avoid division by zero
                distance = (KNOWN_HEIGHT * FOCAL_LENGTH) / height_in_pixels
                distance_text = f"Distance: {distance:.2f} m"
            else:
                distance_text = "Distance: N/A"

            # Draw the bounding box
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)  # Bounding box

            # Calculate the position for distance text
            text_x = x_min
            text_y = y_min - 20  # Position above the bounding box

            # Draw a background rectangle for the distance text
            text_width, text_height = cv2.getTextSize(distance_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
            cv2.rectangle(frame, (text_x, text_y - text_height), (text_x + text_width, text_y + 5), (255, 255, 255), -1)  # White background for text

            # Draw the distance text
            cv2.putText(frame, distance_text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

    # Write the processed frame to the video file
    out.write(frame)  # Save the frame to the output video

    # Display the frame with YOLOv8 detections and distance estimations
    cv2.imshow('Dashcam Traffic Light Detection and Distance Estimation', frame)

    # Exit the video display when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture, VideoWriter, and close display windows
cap.release()
out.release()  # Release the VideoWriter
cv2.destroyAllWindows()


Frame Width: 3840.0
Frame Height: 2160.0
Total Frames: 1537.0

0: 480x640 (no detections), 62.9ms
Speed: 5.1ms preprocess, 62.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 54.8ms
Speed: 5.0ms preprocess, 54.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 53.9ms
Speed: 5.0ms preprocess, 53.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 28.1ms
Speed: 3.5ms preprocess, 28.1ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 25.5ms
Speed: 2.5ms preprocess, 25.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 25.9ms
Speed: 2.0ms preprocess, 25.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 24.6ms
Speed: 3.0ms preprocess, 24.6ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480