In [None]:
# !pip install np ultralytics opencv-python pygame sort-track



In [None]:
import cv2
from ultralytics import YOLO
import time
import numpy as np               
from sort.tracker import SortTracker
import pygame

pygame 2.6.1 (SDL 2.28.4, Python 3.11.14)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:
print("Loading YOLOv8 model...")
model = YOLO('yolov8n.pt')
print("Model loaded.")

Loading YOLOv8 model...
Model loaded.


In [None]:
car_tracker = SortTracker(max_age=20, min_hits=3, iou_threshold=0.3)
person_tracker = SortTracker(max_age=20, min_hits=3, iou_threshold=0.3)

# Directional Audio Initialize
try:
    pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=4096)
    alert_sound = pygame.mixer.Sound('danger.mp3') 
    print("Audio engine initialized.")
except pygame.error as e:
    print(f"Error initializing audio or loading file: {e}")
    print("### PLEASE MAKE SURE 'danger_alert.wav' IS IN THE FOLDER ###")
    alert_sound = None

Audio engine initialized.


In [None]:
video_path = 'sample.mp4' 
cap = cv2.VideoCapture(video_path)

In [None]:
def play_panned_alert(car_center_x, frame_width):
    """Plays the alert sound, panned left or right."""
    if not alert_sound:
        print("Audio file not loaded. Cannot play alert.")
        return
        
    # Calculate pan: 0.0 = full left, 1.0 = full right
    pan = max(0.0, min(1.0, car_center_x / frame_width))
    
    left_volume = 1.0 - pan
    right_volume = pan
    
    try:
        channel = pygame.mixer.find_channel(True) 
        channel.set_volume(left_volume, right_volume)
        channel.play(alert_sound)
    except Exception as e:
        print(f"Error playing sound: {e}")

In [None]:
last_car_areas = {} 
alert_cooldown = 3  
last_alert_time = 0

DANGER_SPEED_THRESHOLD = 500
PATH_DANGER_THRESHOLD = 250   
CLOSE_THRESHOLD = 35000

In [None]:
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("Video finished or failed to read a frame.")
        break

    # Get frame dimensions for audio panning
    frame_height, frame_width, _ = frame.shape

    results = model(frame)

    person_detections = [] 
    car_detections = []    

    for box in results[0].boxes:
        conf = float(box.conf[0])
        if conf < 0.4: 
            continue
            
        cls_id = int(box.cls[0])
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        
        detection_data = [x1, y1, x2, y2, conf, cls_id] 
        
        if cls_id == 0: # Person
            person_detections.append(detection_data)
        elif cls_id == 2 or cls_id == 7: # Car or Truck
            car_detections.append(detection_data)

    if len(person_detections) > 0:
        persons_to_track = np.array(person_detections)
    else:
        persons_to_track = np.empty((0, 6))

    if len(car_detections) > 0:
        cars_to_track = np.array(car_detections)
    else:
        cars_to_track = np.empty((0, 6))

    tracked_persons = person_tracker.update(persons_to_track, frame)
    tracked_cars = car_tracker.update(cars_to_track, frame)
    is_danger_present = False
    danger_car_center_x = 0 
    current_car_areas = {}  
    speed_rate_display = 0  

    for person in tracked_persons:
        person_x1 = int(person[0])
        person_y1 = int(person[1])
        person_x2 = int(person[2])
        person_y2 = int(person[3])
        person_id = int(person[-1]) 
        
        person_center_x = (person_x1 + person_x2) / 2
        
        for car in tracked_cars:
            car_x1 = int(car[0])
            car_y1 = int(car[1])
            car_x2 = int(car[2])
            car_y2 = int(car[3])
            car_id = int(car[-1]) 
            
            car_center_x = (car_x1 + car_x2) / 2
            current_car_area = (car_x2 - car_x1) * (car_y2 - car_y1)
            
            current_car_areas[car_id] = current_car_area
            
            is_dangerously_fast = False
            
            if car_id in last_car_areas:
                speed_rate = current_car_area - last_car_areas[car_id]
                speed_rate_display = speed_rate 
                if speed_rate > DANGER_SPEED_THRESHOLD:
                    is_dangerously_fast = True
            
            path_danger = abs(person_center_x - car_center_x) < PATH_DANGER_THRESHOLD
            
            is_close = current_car_area > CLOSE_THRESHOLD

            if is_dangerously_fast and path_danger and is_close:
                is_danger_present = True
                danger_car_center_x = car_center_x 
                
                
                break 
        if is_danger_present:
            break 

    last_car_areas = current_car_areas.copy()

    annotated_frame = results[0].plot() 
    
    if is_danger_present:
        cv2.putText(
            annotated_frame, 
            "DANGER! COLLISION IMMINENT!", 
            (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 
            2, (0, 0, 255), 3, cv2.LINE_AA
        )
        
        current_time = time.time()
        if (current_time - last_alert_time) > alert_cooldown:
            if not pygame.mixer.get_busy():
                print(f"DANGER! Panning audio based on X-coord: {danger_car_center_x}")
                last_alert_time = current_time
                play_panned_alert(danger_car_center_x, frame_width)

    cv2.putText(
        annotated_frame, 
        f"Speed Rate: {int(speed_rate_display)}", 
        (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 
        1, (0, 255, 0), 2, cv2.LINE_AA
    )
    
    cv2.imshow("YOLOv8 Danger Detection", annotated_frame)

    # press q to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("'q' key pressed. Exiting...")
        break


0: 640x512 2 persons, 3 cars, 2 motorcycles, 1 truck, 1 bed, 145.0ms
Speed: 5.1ms preprocess, 145.0ms inference, 10.3ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 2 persons, 4 cars, 1 motorcycle, 1 truck, 1 bed, 14.9ms
Speed: 7.4ms preprocess, 14.9ms inference, 9.2ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 2 persons, 3 cars, 1 motorcycle, 1 bed, 15.7ms
Speed: 4.4ms preprocess, 15.7ms inference, 8.2ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 person, 4 cars, 3 motorcycles, 1 bed, 11.6ms
Speed: 4.8ms preprocess, 11.6ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 person, 3 cars, 3 motorcycles, 1 truck, 1 bed, 12.1ms
Speed: 3.8ms preprocess, 12.1ms inference, 9.5ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 person, 3 cars, 3 motorcycles, 1 bed, 13.8ms
Speed: 5.2ms preprocess, 13.8ms inference, 9.1ms postprocess per image at shape (1, 3, 640, 512)

0: 640x512 1 person, 3 cars, 3 m

In [None]:
print("Cleaning up resources...")
cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit() 
print("Script finished.")

Cleaning up resources...


Script finished.


In [None]:
# import pyttsx3

# print("Initializing TTS engine...")
# try:
#     engine = pyttsx3.init()
# except Exception as e:
#     print(f"Error initializing engine: {e}")
#     exit()

# print("Engine initialized.")

# engine.setProperty('rate', 150)  # Speed of speech
# engine.setProperty('volume', 1.0) 

# print("Attempting to speak...")
# engine.say("Hello, this is a sound test. Can you hear me?")

# try:
#     engine.runAndWait()
#     print("Speech finished.")
# except Exception as e:
#     print(f"Error during runAndWait: {e}")

# print("Test complete.")