In [15]:
#live detection using video capture

import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist

def add_transparent_overlay(image, text_lines, start_x, start_y, padding=10):
    """Add semi-transparent background for text with overlay"""
    overlay = image.copy()
    max_width = 0
    total_height = 0
    line_heights = []
    
    for text, font_scale, _, thickness in text_lines:
        (text_width, text_height), _ = cv2.getTextSize(
            text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness
        )
        max_width = max(max_width, text_width)
        line_heights.append(text_height + 10)
        total_height += text_height + 10
    
    rect_x1 = start_x - padding
    rect_y1 = start_y - padding
    rect_x2 = start_x + max_width + padding
    rect_y2 = start_y + total_height + padding
    
    alpha = 0.5
    cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (0, 0, 0), -1)
    cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
    
    current_y = start_y
    for (text, font_scale, color, thickness), line_height in zip(text_lines, line_heights):
        cv2.putText(
            image, text, (start_x, current_y + line_height - 5),
            cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness
        )
        current_y += line_height

def calculate_EAR(eye):
    """Calculate Eye Aspect Ratio"""
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    EAR = (A + B) / (2.0 * C)
    return EAR

def calculate_MAR(mouth):
    """Calculate Mouth Aspect Ratio (MAR) for better yawn detection"""
    V1 = dist.euclidean(mouth[3], mouth[9])    
    V2 = dist.euclidean(mouth[2], mouth[10])   
    V3 = dist.euclidean(mouth[1], mouth[11])   
    H = dist.euclidean(mouth[0], mouth[6])     
    MAR = (V1 + V2 + V3) / (3.0 * H)
    return MAR

def get_mouth_height_width_ratio(mouth):
    """Calculate the ratio of mouth height to width"""
    mouth_top = min(point[1] for point in mouth[2:5])
    mouth_bottom = max(point[1] for point in mouth[8:11])
    mouth_left = min(point[0] for point in mouth[0:2])
    mouth_right = max(point[0] for point in mouth[6:8])
    height = dist.euclidean([0, mouth_top], [0, mouth_bottom])
    width = dist.euclidean([mouth_left, 0], [mouth_right, 0])
    return height / width if width > 0 else 0

def preprocess_image(frame):
    """Enhanced preprocessing pipeline that maintains face detection reliability"""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    gray = cv2.filter2D(gray, -1, kernel)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    return gray

def detect_face_at_scales(detector, frame):
    """Attempt face detection at different scales"""
    original_height, original_width = frame.shape[:2]
    scales = [1.0, 2.0, 0.5]
    
    for scale in scales:
        new_width, new_height = int(original_width * scale), int(original_height * scale)
        resized_frame = cv2.resize(frame, (new_width, new_height))
        rects = detector(resized_frame, 0)
        
        if len(rects) > 0:
            scaled_rects = dlib.rectangles()
            for rect in rects:
                scaled_rect = dlib.rectangle(
                    int(rect.left() / scale), int(rect.top() / scale),
                    int(rect.right() / scale), int(rect.bottom() / scale)
                )
                scaled_rects.append(scaled_rect)
            return scaled_rects
    return dlib.rectangles()

def main():
    try:
        detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    except RuntimeError as e:
        print(f"Error loading facial landmark detector: {e}")
        return

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open video capture device")
        return

    print("Starting video capture. Press 'q' to quit.")

    drowsy_frames = 0
    yawn_frames = 0
    DROWSY_THRESHOLD = 10
    YAWN_THRESHOLD = 5  
    MAR_THRESHOLD = 0.6  
    HEIGHT_WIDTH_RATIO_THRESHOLD = 0.5  

    total_frames = 0
    true_positive_drowsy = false_positive_drowsy = true_negative_drowsy = false_negative_drowsy = 0
    true_positive_yawn = false_positive_yawn = true_negative_yawn = false_negative_yawn = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame")
            break

        total_frames += 1
        processed_frame = preprocess_image(frame)
        faces = detect_face_at_scales(detector, processed_frame)

        for rect in faces:
            shape = predictor(processed_frame, rect)
            left_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(36, 42)])
            right_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(42, 48)])
            mouth = np.array([(shape.part(i).x, shape.part(i).y) for i in range(48, 68)])

            left_EAR = calculate_EAR(left_eye)
            right_EAR = calculate_EAR(right_eye)
            avg_EAR = (left_EAR + right_EAR) / 2.0
            mar = calculate_MAR(mouth)
            height_width_ratio = get_mouth_height_width_ratio(mouth)

            is_drowsy = avg_EAR < 0.25
            drowsy_status = drowsy_frames >= DROWSY_THRESHOLD if is_drowsy else False
            drowsy_frames = drowsy_frames + 1 if is_drowsy else max(0, drowsy_frames - 1)

            is_yawning = mar > MAR_THRESHOLD and height_width_ratio > HEIGHT_WIDTH_RATIO_THRESHOLD
            yawning_status = yawn_frames >= YAWN_THRESHOLD if is_yawning else False
            yawn_frames = yawn_frames + 1 if is_yawning else max(0, yawn_frames - 1)

            if drowsy_status:
                true_positive_drowsy += is_drowsy
                false_positive_drowsy += not is_drowsy
            else:
                false_negative_drowsy += is_drowsy
                true_negative_drowsy += not is_drowsy

            if yawning_status:
                true_positive_yawn += is_yawning
                false_positive_yawn += not is_yawning
            else:
                false_negative_yawn += is_yawning
                true_negative_yawn += not is_yawning

            cv2.drawContours(frame, [cv2.convexHull(left_eye)], -1, (0, 255, 0), 1)
            cv2.drawContours(frame, [cv2.convexHull(right_eye)], -1, (0, 255, 0), 1)
            cv2.drawContours(frame, [cv2.convexHull(mouth)], -1, (0, 0, 255) if yawning_status else (255, 255, 255), 1)

            text_lines = [
                (f"Drowsy: {'Yes' if drowsy_status else 'No'}", 0.7, (0, 255, 0) if drowsy_status else (255, 255, 255), 2),
                (f"Yawning: {'Yes' if yawning_status else 'No'}", 0.7, (0, 0, 255) if yawning_status else (255, 255, 255), 2),
                (f"Left EAR: {left_EAR:.2f}", 0.7, (255, 255, 255), 2),
                (f"Right EAR: {right_EAR:.2f}", 0.7, (255, 255, 255), 2),
                (f"MAR: {mar:.2f}", 0.7, (255, 255, 255), 2)
            ]

            add_transparent_overlay(frame, text_lines, 10, 10)

        cv2.imshow('Drowsiness Detection', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    precision_drowsy = true_positive_drowsy / (true_positive_drowsy + false_positive_drowsy) if (true_positive_drowsy + false_positive_drowsy) > 0 else 0
    recall_drowsy = true_positive_drowsy / (true_positive_drowsy + false_negative_drowsy) if (true_positive_drowsy + false_negative_drowsy) > 0 else 0
    f1_score_drowsy = (2 * precision_drowsy * recall_drowsy) / (precision_drowsy + recall_drowsy) if (precision_drowsy + recall_drowsy) > 0 else 0

    precision_yawn = true_positive_yawn / (true_positive_yawn + false_positive_yawn) if (true_positive_yawn + false_positive_yawn) > 0 else 0
    recall_yawn = true_positive_yawn / (true_positive_yawn + false_negative_yawn) if (true_positive_yawn + false_negative_yawn) > 0 else 0
    f1_score_yawn = (2 * precision_yawn * recall_yawn) / (precision_yawn + recall_yawn) if (precision_yawn + recall_yawn) > 0 else 0

    print(f"Drowsy Detection - Precision: {precision_drowsy:.2f}, Recall: {recall_drowsy:.2f}, F1 Score: {f1_score_drowsy:.2f}")
    print(f"Yawning Detection - Precision: {precision_yawn:.2f}, Recall: {recall_yawn:.2f}, F1 Score: {f1_score_yawn:.2f}")

if __name__ == "__main__":
    main()


Starting video capture. Press 'q' to quit.
Drowsy Detection - Precision: 1.00, Recall: 0.58, F1 Score: 0.73
Yawning Detection - Precision: 1.00, Recall: 0.74, F1 Score: 0.85


In [23]:
#Importing  the dataset from the file of our laptop and find  the drwosiness from the dataset which is already stored in the laptop storage and creating a 
#new sep folder for storing the processed images of the dataset
# Importing the necessary libraries

import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist
import os

def calculate_EAR(eye):
    """Calculate Eye Aspect Ratio"""
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    EAR = (A + B) / (2.0 * C)
    return EAR

def calculate_percentage_eye_closure(eye):
    """Calculate percentage of eye closure"""
    vertical_distance = dist.euclidean(eye[1], eye[5]) + dist.euclidean(eye[2], eye[4])
    horizontal_distance = dist.euclidean(eye[0], eye[3])
    return (vertical_distance / (2.0 * horizontal_distance)) * 100

def calculate_yawn_distance(mouth):
    """Calculate vertical distance between lips"""
    if len(mouth) >= 58:
        upper_lip = mouth[51]
        lower_lip = mouth[57]
        return dist.euclidean(upper_lip, lower_lip)
    return 0

def preprocess_image(frame):
    """Enhanced preprocessing pipeline for reliable face detection"""
    if frame is None:
        return None
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    gray = cv2.filter2D(gray, -1, kernel)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    return gray

def detect_face_at_scales(detector, frame):
    """Detect faces at different scales by resizing image"""
    scales = [1.0, 2.0, 0.5]
    for scale in scales:
        if scale == 1.0:
            rects = detector(frame, 0)
            if len(rects) > 0:
                return rects
        else:
            resized_frame = cv2.resize(frame, (int(frame.shape[1] * scale), int(frame.shape[0] * scale)))
            rects = detector(resized_frame, 0)
            if len(rects) > 0:
                return dlib.rectangles(
                    [dlib.rectangle(int(r.left() / scale), int(r.top() / scale), int(r.right() / scale), int(r.bottom() / scale)) for r in rects]
                )
    return dlib.rectangles() 

def add_transparent_overlay(image, text_lines, start_x, start_y, padding=10):
    """Add a semi-transparent overlay with text"""
    overlay = image.copy()
    max_width = max(cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0][0] for text, font_scale, _, thickness in text_lines)
    total_height = sum(cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0][1] + 10 for text, font_scale, _, thickness in text_lines)
    
    cv2.rectangle(overlay, (start_x - padding, start_y - padding), (start_x + max_width + padding, start_y + total_height + padding), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.5, image, 0.5, 0, image)
    
    y = start_y
    for text, font_scale, color, thickness in text_lines:
        cv2.putText(image, text, (start_x, y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness)
        y += cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0][1] + 10

def main():
    try:
        detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    except RuntimeError as e:
        print(f"Error: {e}")
        return

    dataset_path = r"C:\Users\nandh\Downloads\Driver Drowsiness Dataset (DDD)\sample"
    output_path = r"C:\Users\nandh\Downloads\Driver Drowsiness Dataset (DDD)\output"
    os.makedirs(output_path, exist_ok=True)

    true_positive_drowsy = false_positive_drowsy = false_negative_drowsy = 0
    true_positive_yawn = false_positive_yawn = false_negative_yawn = 0
    total_images = len([f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.jpeg', '.png'))])

    for i, filename in enumerate(os.listdir(dataset_path), 1):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            print(f"Processing image {i}/{total_images}: {filename}")
            frame = cv2.imread(os.path.join(dataset_path, filename))
            processed_frame = preprocess_image(frame)
            faces = detect_face_at_scales(detector, processed_frame)

            if len(faces) == 0:
                print(f"No face detected in {filename}")
                continue

            for rect in faces:
                shape = predictor(processed_frame, rect)
                left_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(36, 42)])
                right_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(42, 48)])
                mouth = np.array([(shape.part(i).x, shape.part(i).y) for i in range(48, 68)])

                left_EAR = calculate_EAR(left_eye)
                right_EAR = calculate_EAR(right_eye)
                avg_EAR = (left_EAR + right_EAR) / 2.0
                
                left_eye_closure = calculate_percentage_eye_closure(left_eye)
                right_eye_closure = calculate_percentage_eye_closure(right_eye)
                yawn_distance = calculate_yawn_distance(mouth)

                # Detection Status
                status = "Drowsy!" if avg_EAR < 0.25 else "Not drowsy"
                yawning_status = "Yawning!" if yawn_distance > 20 else "Not Yawning"

                if status == "Drowsy!":
                    true_positive_drowsy += 1
                else:
                    false_negative_drowsy += 1

                if yawning_status == "Yawning!":
                    true_positive_yawn += 1
                else:
                    false_negative_yawn += 1

                text_lines = [
                    (f"Status: {status}", 0.7, (0, 0, 255) if status == "Drowsy!" else (0, 255, 0), 2),
                    (f"EAR: {avg_EAR:.2f}", 0.7, (255, 255, 255), 2),
                    (f"Left Eye: {left_eye_closure:.1f}%", 0.7, (255, 255, 255), 1),
                    (f"Right Eye: {right_eye_closure:.1f}%", 0.7, (255, 255, 255), 1),
                    (f"Yawning Status: {yawning_status}", 0.7, (0, 255, 255), 1)
                ]
                add_transparent_overlay(frame, text_lines, 10, 10)

            cv2.imwrite(os.path.join(output_path, filename), frame)
            print(f"Processed {filename}.")

    # Metrics Calculation
    precision_drowsy = true_positive_drowsy / (true_positive_drowsy + false_positive_drowsy) if (true_positive_drowsy + false_positive_drowsy) > 0 else 0
    recall_drowsy = true_positive_drowsy / (true_positive_drowsy + false_negative_drowsy) if (true_positive_drowsy + false_negative_drowsy) > 0 else 0
    f1_score_drowsy = (2 * precision_drowsy * recall_drowsy) / (precision_drowsy + recall_drowsy) if (precision_drowsy + recall_drowsy) > 0 else 0

    precision_yawn = true_positive_yawn / (true_positive_yawn + false_positive_yawn) if (true_positive_yawn + false_positive_yawn) > 0 else 0
    recall_yawn = true_positive_yawn / (true_positive_yawn + false_negative_yawn) if (true_positive_yawn + false_negative_yawn) > 0 else 0
    f1_score_yawn = (2 * precision_yawn * recall_yawn) / (precision_yawn + recall_yawn) if (precision_yawn + recall_yawn) > 0 else 0

    # Metrics Output
    print(f"Drowsy Detection - Precision: {precision_drowsy:.2f}, Recall: {recall_drowsy:.2f}, F1 Score: {f1_score_drowsy:.2f}")
    print(f"Yawning Detection - Precision: {precision_yawn:.2f}, Recall: {recall_yawn:.2f}, F1 Score: {f1_score_yawn:.2f}")

if __name__ == "__main__":
    main()


Processing image 1/185: A1183.png
Processed A1183.png.
Processing image 2/185: A1184.png
Processed A1184.png.
Processing image 3/185: A1185.png
Processed A1185.png.
Processing image 4/185: A1186.png
Processed A1186.png.
Processing image 5/185: A1187.png
Processed A1187.png.
Processing image 6/185: A1195.png
Processed A1195.png.
Processing image 7/185: A1196.png
Processed A1196.png.
Processing image 8/185: A1197.png
Processed A1197.png.
Processing image 9/185: A1198.png
Processed A1198.png.
Processing image 10/185: A1199.png
Processed A1199.png.
Processing image 11/185: B0271.png
Processed B0271.png.
Processing image 12/185: B0272.png
Processed B0272.png.
Processing image 13/185: B0273.png
Processed B0273.png.
Processing image 14/185: B0274.png
Processed B0274.png.
Processing image 15/185: B0275.png
Processed B0275.png.
Processing image 16/185: B0276.png
Processed B0276.png.
Processing image 17/185: B0277.png
Processed B0277.png.
Processing image 18/185: B0283.png
Processed B0283.png.
P