# CLEANING YAWDD DATASET

In [None]:
import os
import shutil
import zipfile

# Your specific paths
yawdd_raw_path = r'C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\YAWDD_DATASET'
nitymed_raw_path = r'C:\Users\arman\Desktop\FYP_ARMAN\Driver-Fatigue-Detection-Using-Vision-Based-Machine-Learning\NITYMED_DATASET'
combined_processed_path = 'CLEANED_DRIVER_FATIGUE'

# --- 1. PROCESS YAWDD (Zip Files) ---
for root, dirs, files in os.walk(yawdd_raw_path):
    for file in files:
        if file.endswith(".zip"):
            file_lower = file.lower()
            
            # Identify Angle
            angle = 'dash' if 'dash' in root.lower() or 'dash' in file_lower else 'mirror'
            
            # Map YawDD to the 4 target classes
            if 'yawning' in file_lower:
                label = 'Yawning'
            elif 'talking' in file_lower or 'singing' in file_lower:
                label = 'Talking'
            else:
                label = 'Normal'  # YawDD "Normal" usually covers non-fatigued states
            
            target_dir = os.path.join(combined_processed_path, label, angle)
            os.makedirs(target_dir, exist_ok=True)
            
            with zipfile.ZipFile(os.path.join(root, file), 'r') as zip_ref:
                zip_ref.extractall(target_dir)

# --- 2. PROCESS NITYMED (MP4 Files) ---
# NITYMED specifically features side-profile views [cite: 385, 389]
for root, dirs, files in os.walk(nitymed_raw_path):
    for file in files:
        if file.endswith(".mp4"):
            file_lower = file.lower()
            
            # Map NITYMED structure to the 4 target classes
            if 'microsleep' in root.lower() or 'microsleep' in file_lower:
                label = 'Microsleep'
            elif 'yawning' in root.lower() or 'yawning' in file_lower:
                label = 'Yawning'
            else:
                label = 'Normal'
            
            # NITYMED uses side-profile angles [cite: 385]
            angle = 'side_profile'
            
            target_dir = os.path.join(combined_processed_path, label, angle)
            os.makedirs(target_dir, exist_ok=True)
            
            # Copy MP4 to the combined directory
            source_file = os.path.join(root, file)
            shutil.copy2(source_file, os.path.join(target_dir, file))

print(f"Dataset integration complete. Data stored in: {combined_processed_path}")

Dataset integration complete. Data stored in: data/DRIVER_FATIGUE


In [7]:
import cv2
import os

# Path where you sorted the videos in Cell 1
processed_path = 'data/sorted_yawdd'
frames_output_path = 'data/yawdd_frames'

def extract_frames(video_path, output_folder, frame_rate=5):
    cap = cv2.VideoCapture(video_path)
    count = 0
    success = True
    
    video_name = os.path.basename(video_path).split('.')[0]
    os.makedirs(output_folder, exist_ok=True)

    while success:
        success, image = cap.read()
        if success and count % frame_rate == 0:
            # Resize for consistency as per report phase 2
            image = cv2.resize(image, (224, 224)) 
            frame_filename = f"{video_name}_frame_{count}.jpg"
            cv2.imwrite(os.path.join(output_folder, frame_filename), image)
        count += 1
    cap.release()

# Walk through sorted directories and extract
for root, dirs, files in os.walk(processed_path):
    for file in files:
        if file.endswith(".avi"):
            # Maintain the angle/label structure in the frames folder
            rel_path = os.path.relpath(root, processed_path)
            target_folder = os.path.join(frames_output_path, rel_path)
            
            video_full_path = os.path.join(root, file)
            extract_frames(video_full_path, target_folder)

print("Frame extraction for YawDD complete.")

Frame extraction for YawDD complete.
