In [None]:
import cv2
import dlib
import numpy as np
import sounddevice as sd
import threading

PREDICTOR_PATH = "model/shape_predictor_68_face_landmarks.dat"
EAR_THRESHOLD = 0.2
EAR_CONSEC_FRAMES = 48

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(PREDICTOR_PATH)

def eye_aspect_ratio(eye):
    A = np.linalg.norm(np.array(eye[1]) - np.array(eye[5]))
    B = np.linalg.norm(np.array(eye[2]) - np.array(eye[4]))
    C = np.linalg.norm(np.array(eye[0]) - np.array(eye[3]))
    return (A + B) / (2.0 * C)

def draw_polygons(frame, landmarks):
    left_eye_points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
    right_eye_points = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

    left_eye_polygon = np.array(left_eye_points, np.int32)
    right_eye_polygon = np.array(right_eye_points, np.int32)

    cv2.polylines(frame, [left_eye_polygon], True, (0, 255, 0), 1)
    cv2.polylines(frame, [right_eye_polygon], True, (0, 255, 0), 1)

def play_tone(frequency, duration, amplitude=0.5, sample_rate=44100):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    note = amplitude * np.sin(frequency * t * 2 * np.pi)
    audio = (note * (2**15 - 1)).astype(np.int16)
    
    def callback(outdata, frames, time, status):
        if len(audio) >= frames:
            outdata[:frames] = audio[:frames].reshape(-1, 1)
        else:
            outdata[:len(audio)] = audio.reshape(-1, 1)
            outdata[len(audio):] = 0

    with sd.OutputStream(samplerate=sample_rate, channels=1, callback=callback):
        sd.sleep(int(duration * 1000))

def alert_sound():
    threading.Thread(target=play_tone, args=(440, 1)).start()

def main():
    cap = cv2.VideoCapture(0)
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame")
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        for face in faces:
            landmarks = predictor(gray, face)
            draw_polygons(frame, landmarks)

            left_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
            right_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

            leftEAR = eye_aspect_ratio(left_eye)
            rightEAR = eye_aspect_ratio(right_eye)
            ear = (leftEAR + rightEAR) / 2.0

            if ear < EAR_THRESHOLD:
                # Text settings
                text = "DROWSINESS ALERT!"
                font_scale = 3  # Increase the font scale for bigger text
                thickness = 5
                color = (0, 0, 255)  # Red color

                # Get text size
                text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0]
                text_width, text_height = text_size

                # Calculate the center position
                frame_height, frame_width = frame.shape[:2]
                x = (frame_width - text_width) // 2
                y = (frame_height + text_height) // 2

                # Put the text in the center of the frame
                cv2.putText(frame, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness)
                
                alert_sound()  # Play the alert sound in a separate thread

        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


