In [None]:
"DIGITAL IMAGE PROCESSING PROJECT"

In [7]:
#LIVE DETECTION 

import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist

def calculate_EAR(eye):
    """Calculate Eye Aspect Ratio"""
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    EAR = (A + B) / (2.0 * C)
    return EAR

def calculate_percentage_eye_closure(eye):
    """Calculate percentage of eye closure"""
    vertical_distance = dist.euclidean(eye[1], eye[5]) + dist.euclidean(eye[2], eye[4])
    horizontal_distance = dist.euclidean(eye[0], eye[3])
    return (vertical_distance / (2.0 * horizontal_distance)) * 100

def calculate_MAR(mouth):
    """Calculate Mouth Aspect Ratio (MAR) for better yawn detection"""
    # Vertical distances
    V1 = dist.euclidean(mouth[3], mouth[9])    
    V2 = dist.euclidean(mouth[2], mouth[10])   
    V3 = dist.euclidean(mouth[1], mouth[11])   
    
    H = dist.euclidean(mouth[0], mouth[6])     
    
    # Calculate MAR
    MAR = (V1 + V2 + V3) / (3.0 * H)
    return MAR

def get_mouth_height_width_ratio(mouth):
    """
    Calculate the ratio of mouth height to width
    """
    mouth_top = min(point[1] for point in mouth[2:5])
    
    mouth_bottom = max(point[1] for point in mouth[8:11])
    
    mouth_left = min(point[0] for point in mouth[0:2])
      
    mouth_right = max(point[0] for point in mouth[6:8])
    
    height = dist.euclidean([0, mouth_top], [0, mouth_bottom])
    width = dist.euclidean([mouth_left, 0], [mouth_right, 0])
    
    if width > 0:
        return height / width
    return 0

# preprocessing functions 
def preprocess_image(frame):
    """Enhanced preprocessing pipeline that maintains face detection reliability"""
    if frame is None:
        return None
    
    original_height, original_width = frame.shape[:2]
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    kernel = np.array([[0, -1, 0],
                      [-1, 5, -1],
                      [0, -1, 0]])
    gray = cv2.filter2D(gray, -1, kernel)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    return gray

def detect_face_at_scales(detector, frame):
    """Attempt face detection at different scales"""
    original_height, original_width = frame.shape[:2]
    scales = [1.0, 2.0, 0.5]
    
    for scale in scales:
        if scale == 1.0:
            rects = detector(frame, 0)
            if len(rects) > 0:
                return rects
        else:
            new_width = int(original_width * scale)
            new_height = int(original_height * scale)
            resized_frame = cv2.resize(frame, (new_width, new_height))
            rects = detector(resized_frame, 0)
            
            if len(rects) > 0:
                scaled_rects = dlib.rectangles()
                for rect in rects:
                    scaled_rect = dlib.rectangle(
                        int(rect.left() / scale),
                        int(rect.top() / scale),
                        int(rect.right() / scale),
                        int(rect.bottom() / scale)
                    )
                    scaled_rects.append(scaled_rect)
                return scaled_rects
    
    return dlib.rectangles()

def add_transparent_overlay(image, text_lines, start_x, start_y, padding=10):
    """Add semi-transparent background for text with overlay"""
    overlay = image.copy()
    max_width = 0
    total_height = 0
    line_heights = []
    
    for text, font_scale, _, thickness in text_lines:
        (text_width, text_height), _ = cv2.getTextSize(
            text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness
        )
        max_width = max(max_width, text_width)
        line_heights.append(text_height + 10)
        total_height += text_height + 10
    
    rect_x1 = start_x - padding
    rect_y1 = start_y - padding
    rect_x2 = start_x + max_width + padding
    rect_y2 = start_y + total_height + padding
    
    alpha = 0.5
    cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (0, 0, 0), -1)
    cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
    
    current_y = start_y
    for (text, font_scale, color, thickness), line_height in zip(text_lines, line_heights):
        cv2.putText(
            image, text, (start_x, current_y + line_height - 5),
            cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness
        )
        current_y += line_height

def main():
    try:
        detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    except RuntimeError as e:
        print(f"Error loading facial landmark detector: {e}")
        print("Please ensure 'shape_predictor_68_face_landmarks.dat' is in the correct path")
        return

    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        print("Error: Could not open video capture device")
        return

    print("Starting video capture. Press 'q' to quit.")

    # Initialize variables for detection
    drowsy_frames = 0
    yawn_frames = 0
    DROWSY_THRESHOLD = 10
    YAWN_THRESHOLD = 5  
    MAR_THRESHOLD = 0.6  
    HEIGHT_WIDTH_RATIO_THRESHOLD = 0.5  

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame")
            break

        original_frame = frame.copy()
        processed_frame = preprocess_image(frame)
        if processed_frame is None:
            continue
        

        faces = detect_face_at_scales(detector, processed_frame)
        faces_detected = len(faces) > 0

        if not faces_detected:
            cv2.putText(frame, "No face detected", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        else:
            for rect in faces:
                shape = predictor(processed_frame, rect)
                
                left_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(36, 42)])
                right_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(42, 48)])
                mouth = np.array([(shape.part(i).x, shape.part(i).y) for i in range(48, 68)])

                left_EAR = calculate_EAR(left_eye)
                right_EAR = calculate_EAR(right_eye)
                avg_EAR = (left_EAR + right_EAR) / 2.0
                
                left_eye_closure = calculate_percentage_eye_closure(left_eye)
                right_eye_closure = calculate_percentage_eye_closure(right_eye)
                
                mar = calculate_MAR(mouth)
                height_width_ratio = get_mouth_height_width_ratio(mouth)


                if mar > MAR_THRESHOLD and height_width_ratio > HEIGHT_WIDTH_RATIO_THRESHOLD:
                    yawn_frames += 1
                else:
                    yawn_frames = max(0, yawn_frames - 1)

                #  drowsiness status
                if avg_EAR < 0.25:
                    drowsy_frames += 1
                else:
                    drowsy_frames = max(0, drowsy_frames - 1)

                
                status = "DROWSY ALERT!" if drowsy_frames >= DROWSY_THRESHOLD else "Non drowsy"
                color = (0, 0, 255) if status == "DROWSY ALERT!" else (0, 255, 0)
                yawning_status = "YAWNING!" if yawn_frames >= YAWN_THRESHOLD else "Not Yawning"
                yawn_color = (0, 0, 255) if yawning_status == "YAWNING!" else (0, 255, 255)

                # Draw face rectangle
                x, y, w, h = rect.left(), rect.top(), rect.width(), rect.height()
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

                # Draw feature contours
                cv2.drawContours(frame, [cv2.convexHull(left_eye)], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [cv2.convexHull(right_eye)], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [cv2.convexHull(mouth)], -1, yawn_color, 1)

                # Create text lines with their properties
                text_lines = [
                    (f"Status: {status}", 0.7, color, 2),
                    (f"EAR: {avg_EAR:.2f}", 0.7, (255, 255, 255), 2),
                    (f"MAR: {mar:.2f}", 0.7, (255, 255, 255), 2),
                    (f"Yawn Status: {yawning_status}", 0.7, yawn_color, 2),
                    (f"Left Eye: {left_eye_closure:.1f}%", 0.7, (255, 255, 255), 1),
                    (f"Right Eye: {right_eye_closure:.1f}%", 0.7, (255, 255, 255), 1)
                ]
                
                add_transparent_overlay(frame, text_lines, 10, 10)

        cv2.imshow('Drowsiness Detection', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Starting video capture. Press 'q' to quit.


In [1]:
#Importing  the dataset from the file of our laptop and find  the drwosiness from the dataset which is already stored in the laptop storage and creating a 
#new sep folder for storing the processed images of the dataset

import cv2
import dlib
import numpy as np
from scipy.spatial import distance as dist
import os

def calculate_EAR(eye):
    """Calculate Eye Aspect Ratio"""
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    EAR = (A + B) / (2.0 * C)
    return EAR

def calculate_percentage_eye_closure(eye):
    """Calculate percentage of eye closure"""
    vertical_distance = dist.euclidean(eye[1], eye[5]) + dist.euclidean(eye[2], eye[4])
    horizontal_distance = dist.euclidean(eye[0], eye[3])
    return (vertical_distance / (2.0 * horizontal_distance)) * 100

def calculate_yawn_distance(mouth):
    """Calculate vertical distance between lips"""
    if len(mouth) >= 58:
        upper_lip = mouth[51]
        lower_lip = mouth[57]
        return dist.euclidean(upper_lip, lower_lip)
    return 0

def preprocess_image(frame):
    """Enhanced preprocessing pipeline that maintains face detection reliability"""
    if frame is None:
        return None
    
    original_height, original_width = frame.shape[:2]
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    gray = clahe.apply(gray)
    
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    
    kernel = np.array([[0, -1, 0],
                      [-1, 5, -1],
                      [0, -1, 0]])
    gray = cv2.filter2D(gray, -1, kernel)
    
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    
    return gray

def detect_face_at_scales(detector, frame):
    """
    Attempt face detection at different scales by resizing the image instead of using dlib's upsample parameter.
    Returns the rectangles detected in the original image scale.
    """
    original_height, original_width = frame.shape[:2]
    scales = [1.0, 2.0, 0.5]  # Different scales to try
    
    for scale in scales:
        if scale == 1.0:
            rects = detector(frame, 0)  
            if len(rects) > 0:
                return rects
        else:
            new_width = int(original_width * scale)
            new_height = int(original_height * scale)
            resized_frame = cv2.resize(frame, (new_width, new_height))
            
            rects = detector(resized_frame, 0)
            
            if len(rects) > 0:
                scaled_rects = dlib.rectangles()
                for rect in rects:
                    scaled_rect = dlib.rectangle(
                        int(rect.left() / scale),
                        int(rect.top() / scale),
                        int(rect.right() / scale),
                        int(rect.bottom() / scale)
                    )
                    scaled_rects.append(scaled_rect)
                return scaled_rects
    
    return dlib.rectangles()   

def add_transparent_overlay(image, text_lines, start_x, start_y, padding=10):
    """
    Add semi-transparent background for text with overlay"""

        overlay = image.copy()
    
    max_width = 0
    total_height = 0
    line_heights = []
    
    for text, font_scale, _, thickness in text_lines:
        (text_width, text_height), _ = cv2.getTextSize(
            text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness
        )
        max_width = max(max_width, text_width)
        line_heights.append(text_height + 10)  
        total_height += text_height + 10
    
    
    rect_x1 = start_x - padding
    rect_y1 = start_y - padding
    rect_x2 = start_x + max_width + padding
    rect_y2 = start_y + total_height + padding
    
    alpha = 0.5  # Transparency factor (0 = fully transparent, 1 = fully opaque)
    cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (0, 0, 0), -1)
    cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
    
    current_y = start_y
    for (text, font_scale, color, thickness), line_height in zip(text_lines, line_heights):
        cv2.putText(
            image, text, (start_x, current_y + line_height - 5),
            cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness
        )
        current_y += line_height

def main():
    try:
        detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    except RuntimeError as e:
        print(f"Error loading facial landmark detector: {e}")
        print("Please ensure 'shape_predictor_68_face_landmarks.dat' is in the correct path")
        return

    # Paths to dataset and output folders
    dataset_path = r"C:\Users\nandh\Downloads\Driver Drowsiness Dataset (DDD)\sample"
    output_path = r"C:\Users\nandh\Downloads\Driver Drowsiness Dataset (DDD)\output"

    # Create the output directory 
    os.makedirs(output_path, exist_ok=True)

    total_images = len([f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.jpeg', '.png'))])
    processed_images = 0

    for filename in os.listdir(dataset_path):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            processed_images += 1
            print(f"Processing image {processed_images}/{total_images}: {filename}")
            
            image_path = os.path.join(dataset_path, filename)
            frame = cv2.imread(image_path)

            if frame is None:
                print(f"Error: Could not load the image {filename}")
                continue

            original_frame = frame.copy()
            
            processed_frame = preprocess_image(frame)
            if processed_frame is None:
                print(f"Error: Failed to preprocess {filename}")
                continue

            faces = detect_face_at_scales(detector, processed_frame)
            faces_detected = len(faces) > 0

            if not faces_detected:
                print(f"No face detected in {filename}")
                debug_image = np.hstack((original_frame, cv2.cvtColor(processed_frame, cv2.COLOR_GRAY2BGR)))
                cv2.imwrite(os.path.join(output_path, f"debug_{filename}"), debug_image)
                continue

            for rect in faces:
                shape = predictor(processed_frame, rect)
                
                left_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(36, 42)])
                right_eye = np.array([(shape.part(i).x, shape.part(i).y) for i in range(42, 48)])
                mouth = np.array([(shape.part(i).x, shape.part(i).y) for i in range(48, 68)])

                
                left_EAR = calculate_EAR(left_eye)
                right_EAR = calculate_EAR(right_eye)
                avg_EAR = (left_EAR + right_EAR) / 2.0
                
                left_eye_closure = calculate_percentage_eye_closure(left_eye)
                right_eye_closure = calculate_percentage_eye_closure(right_eye)
                yawn_distance = calculate_yawn_distance(mouth)

                # Determine status
                status = "Drowsy!" if avg_EAR < 0.25 else "Not drowsy"
                color = (0, 0, 255) if status == "Drowsy!" else (0, 255, 0)
                yawning_status = "Yawning!" if yawn_distance > 20 else "Not Yawning"

                
                x, y, w, h = rect.left(), rect.top(), rect.width(), rect.height()
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

                
                cv2.drawContours(frame, [cv2.convexHull(left_eye)], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [cv2.convexHull(right_eye)], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [cv2.convexHull(mouth)], -1, (0, 255, 255), 1)

                text_lines = [
                    (f"Status: {status}", 0.7, color, 2),
                    (f"EAR: {avg_EAR:.2f}", 0.7, (255, 255, 255), 2),
                    (f"Left Eye: {left_eye_closure:.1f}%", 0.7, (255, 255, 255), 1),
                    (f"Right Eye: {right_eye_closure:.1f}%", 0.7, (255, 255, 255), 1),
                    (f"{yawning_status}", 0.7, (0, 255, 255), 1)
                ]
                
                add_transparent_overlay(frame, text_lines, 10, 10)

            output_image_path = os.path.join(output_path, filename)
            cv2.imwrite(output_image_path, frame)
            
            print(f"Results for {filename}:")
            print(f"- Status: {status}")
            print(f"- Average EAR: {avg_EAR:.2f}")
            print(f"- Eye Closure: L={left_eye_closure:.1f}%, R={right_eye_closure:.1f}%")
            print(f"- Yawning Status: {yawning_status}")
            print("-" * 50)

    print("\nProcessing complete!")
    print(f"Processed {processed_images} images")

if __name__ == "__main__":
    main()

Processing image 1/185: A1183.png
Results for A1183.png:
- Status: Drowsy!
- Average EAR: 0.11
- Eye Closure: L=10.8%, R=11.9%
- Yawning Status: Not Yawning
--------------------------------------------------
Processing image 2/185: A1184.png
Results for A1184.png:
- Status: Drowsy!
- Average EAR: 0.16
- Eye Closure: L=15.8%, R=15.9%
- Yawning Status: Not Yawning
--------------------------------------------------
Processing image 3/185: A1185.png
Results for A1185.png:
- Status: Drowsy!
- Average EAR: 0.19
- Eye Closure: L=20.6%, R=17.3%
- Yawning Status: Not Yawning
--------------------------------------------------
Processing image 4/185: A1186.png
Results for A1186.png:
- Status: Drowsy!
- Average EAR: 0.13
- Eye Closure: L=13.3%, R=12.1%
- Yawning Status: Not Yawning
--------------------------------------------------
Processing image 5/185: A1187.png
Results for A1187.png:
- Status: Drowsy!
- Average EAR: 0.18
- Eye Closure: L=16.8%, R=19.3%
- Yawning Status: Not Yawning
-----------