# CLEANING YAWDD DATASET

In [5]:
import os
import shutil

# Your specific paths
yawdd_raw_path = r'C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\YAWDD_DATASET'
nitymed_raw_path = r'C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\NITYMED_DATASET'
combined_processed_path = r'C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\CLEANED_DRIVER_FATIGUE'

# Supported video extensions
video_extensions = ('.mp4', '.avi', '.mkv', '.mov')

def organize_dataset(source_path, dataset_name):
    print(f"--- Processing {dataset_name} ---")
    count = 0
    skipped = 0
    
    # Check if source exists before starting
    if not os.path.exists(source_path):
        print(f"Error: Source path does not exist: {source_path}")
        return

    for root, dirs, files in os.walk(source_path):
        # Safety: Don't process the destination folder if it's inside the source
        if combined_processed_path in root:
            continue
            
        for file in files:
            if file.lower().endswith(video_extensions):
                file_lower = file.lower()
                root_lower = root.lower()
                
                # --- 1. EXCLUDE YAWDD DASH ---
                if dataset_name == "YawDD" and ('dash' in root_lower or 'dash' in file_lower):
                    skipped += 1
                    continue

                # --- 2. LABELING LOGIC ---
                if 'microsleep' in root_lower or 'microsleep' in file_lower:
                    label = 'Microsleep'
                elif 'yawn' in root_lower or 'yawn' in file_lower:
                    label = 'Yawning'
                elif 'talking' in file_lower or 'singing' in file_lower:
                    label = 'Talking'
                else:
                    label = 'Normal'
                
                # --- 3. ANGLE LOGIC ---
                if dataset_name == "NITYMED":
                    angle = 'aircond'
                else: 
                    # Only 'mirror' or 'unknown' will reach here due to dash exclusion
                    angle = 'mirror' if 'mirror' in root_lower or 'mirror' in file_lower else 'unknown'

                # --- 4. EXECUTE MOVE ---
                target_dir = os.path.join(combined_processed_path, label, angle)
                os.makedirs(target_dir, exist_ok=True)
                
                source_file = os.path.join(root, file)
                destination_file = os.path.join(target_dir, file)
                
                try:
                    shutil.move(source_file, destination_file)
                    count += 1
                except Exception as e:
                    print(f"Error moving {file}: {e}")

    print(f"Finished {dataset_name}. Moved: {count} | Skipped (Dash): {skipped}")

# Run the process
organize_dataset(yawdd_raw_path, "YawDD")
organize_dataset(nitymed_raw_path, "NITYMED")

print(f"\nSuccess! All files reorganized in: {combined_processed_path}")

--- Processing YawDD ---
Finished YawDD. Moved: 319 | Skipped (Dash): 29
--- Processing NITYMED ---
Finished NITYMED. Moved: 126 | Skipped (Dash): 0

Success! All files reorganized in: C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\CLEANED_DRIVER_FATIGUE


# Frame Extraction & Feature Analysis

In [2]:
import mediapipe as mp
print(f"Version: {mp.__version__}")
print(f"File Path: {mp.__file__}")

Version: 0.10.32
File Path: c:\Users\arman\miniconda3\envs\FYP\Lib\site-packages\mediapipe\__init__.py


In [4]:
import cv2
import dlib
import os
import numpy as np

# 1. Initialize Dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# 2. Define Windows-specific paths
base_path = r'data\sorted_yawdd'
output_path = r'data\processed_images'

def get_mar(shape):
    # Implements the MAR formula from your literature review 
    # Vertical distances between inner lips
    v1 = np.linalg.norm(shape[61] - shape[67])
    v2 = np.linalg.norm(shape[63] - shape[65])
    # Horizontal distance
    h = np.linalg.norm(shape[60] - shape[64])
    return (v1 + v2) / (2.0 * h)

# 3. Automation Loop
for root, dirs, files in os.walk(base_path):
    for file in files:
        if not file.endswith('.avi'): continue
        
        video_path = os.path.join(root, file)
        cap = cv2.VideoCapture(video_path)
        frame_idx = 0
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: break
            
            # Sample every 5th frame to manage computational resources [cite: 197]
            if frame_idx % 5 == 0:
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = detector(gray)
                
                for face in faces:
                    landmarks = predictor(gray, face)
                    shape = np.array([[landmarks.part(i).x, landmarks.part(i).y] for i in range(68)])
                    
                    mar = get_mar(shape)
                    
                    # Logic: Extract yawning frames or all normal frames [cite: 287]
                    if ('yawn' in root and mar > 0.3) or ('normal' in root):
                        # Resize to 224x224 for VGG16/ResNet-50/MobileNetV4 [cite: 218]
                        final_img = cv2.resize(frame, (224, 224))
                        
                        save_dir = os.path.join(output_path, os.path.relpath(root, base_path))
                        os.makedirs(save_dir, exist_ok=True)
                        cv2.imwrite(os.path.join(save_dir, f"{file}_f{frame_idx}.jpg"), final_img)
            
            frame_idx += 1
        cap.release()

print("Dlib extraction complete! Your YawDD dataset is now processed for training.")

ModuleNotFoundError: No module named 'dlib'