In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/coconames/coco.names
/kaggle/input/shape-predictor-68-face-landmarksdat/shape_predictor_68_face_landmarks.dat
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/47/5.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/47/10.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/47/0.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/45/5.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/45/10.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/45/0.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/42/5.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/42/10.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/42/0.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/43/5.mp4
/kaggle/input/uta-reallife-drowsiness-dataset/Fold4_part2/Fold4_part2/43/10.mov
/ka

In [None]:
!pip install dlib



# Characteristics Extraction

In [None]:
import cv2
import dlib
import numpy as np
import pandas as pd

# Load the pre-trained face detector from dlib
detector = dlib.get_frontal_face_detector()

# Load the facial landmarks predictor
# Replace the path with the correct location of your shape_predictor_68_face_landmarks.dat file
predictor = dlib.shape_predictor("/kaggle/input/shape-predictor-68-face-landmarksdat/shape_predictor_68_face_landmarks.dat")

# Function to calculate EAR
def eye_aspect_ratio(eye):
    x = [point.x for point in eye]
    y = [point.y for point in eye]
    A = np.linalg.norm(np.array([x[1] - x[5], y[1] - y[5]]))
    B = np.linalg.norm(np.array([x[2] - x[4], y[2] - y[4]]))
    C = np.linalg.norm(np.array([x[0] - x[3], y[0] - y[3]]))
    ear = (A + B) / (2.0 * C)
    return ear

# Function to calculate PUC
def pupil_to_eye_center_distance(eye):
    x = [point.x for point in eye]
    y = [point.y for point in eye]
    d = np.linalg.norm(np.array([x[0] - x[3], y[0] - y[3]]))
    return d

# Function to calculate MAR
def mouth_aspect_ratio(mouth):
    x = [point.x for point in mouth]
    y = [point.y for point in mouth]
    A = np.linalg.norm(np.array([x[13] - x[19], y[13] - y[19]]))
    B = np.linalg.norm(np.array([x[14] - x[18], y[14] - y[18]]))
    C = np.linalg.norm(np.array([x[15] - x[17], y[15] - y[17]]))
    mar = (A + B + C) / (3.0 * np.linalg.norm(np.array([x[12] - x[16], y[12] - y[16]])))
    return mar

# Function to calculate MOE
def mouth_to_eye_ratio(eye, mouth):
    ear = eye_aspect_ratio(eye)
    mar = mouth_aspect_ratio(mouth)
    if ear == 0:  # Avoid division by zero
        return 0
    moe = mar / ear
    return moe

# Function to extract features and label from a frame
def extract_features(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    faces = detector(gray)

    features = []
    labels = []

    for face in faces:
        # Predict the facial landmarks
        shape = predictor(gray, face)

        # Calculate EAR, PUC, MAR, and MOE
        ear = eye_aspect_ratio(shape.parts()[36:42])
        puc = pupil_to_eye_center_distance(shape.parts()[36:42])
        mar = mouth_aspect_ratio(shape.parts()[48:68])
        moe = mouth_to_eye_ratio(shape.parts()[36:42], shape.parts()[48:68])

        # Define your condition for drowsiness detection
        # For example, if EAR is below a certain threshold, and MAR and PUC are also below thresholds, consider it as drowsy
        drowsy = 1 if ear < 0.2 or mar > 0.4 or puc < 70 or moe > 0.2 else 0

        # Append features and label to the lists
        features.append([ear, puc, mar, moe])
        labels.append(drowsy)

    return features, labels

# Directory containing UTA dataset video
uta_dataset_video_path = "/kaggle/input/uta-reallife-drowsiness-dataset/Fold1_part1/Fold1_part1/01/0.mov"

# Open video capture
cap = cv2.VideoCapture(uta_dataset_video_path)

all_features = []
all_labels = []

# Frame skipping configuration
frame_skip = 5  # Process every 5th frame
counter = 0

# Read frames from the video
while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        break

    counter += 1
    if counter % frame_skip != 0:
        continue

    # Extract features and label from the current frame
    features, labels = extract_features(frame)
    all_features.extend(features)
    all_labels.extend(labels)

# Release the video capture object
cap.release()

# Convert the lists of features and labels to a DataFrame
column_names = ["EAR", "PUC", "MAR", "MOE"]
df_features = pd.DataFrame(all_features, columns=column_names)
df_labels = pd.DataFrame({"drowsy": all_labels})

# Concatenate features and labels DataFrames
df = pd.concat([df_features, df_labels], axis=1)

# Display the DataFrame with extracted features and labels
print(df)


           EAR        PUC       MAR       MOE  drowsy
0     0.289598  76.006579  0.081149  0.280211       1
1     0.298701  77.000000  0.089677  0.300222       1
2     0.306356  80.006250  0.053830  0.175710       0
3     0.325139  80.024996  0.029812  0.091689       0
4     0.356792  80.000000  0.009103  0.025515       0
...        ...        ...       ...       ...     ...
3604  0.298051  72.173402  0.024935  0.083660       0
3605  0.308263  73.061618  0.006100  0.019788       0
3606  0.312387  72.062473  0.010434  0.033401       0
3607  0.326263  72.027772  0.011578  0.035486       0
3608  0.313997  70.064256  0.008488  0.027033       0

[3609 rows x 5 columns]


In [None]:
# Save the DataFrame to a CSV file
csv_file_path = "/kaggle/working/Extract features.csv"
df.to_csv(csv_file_path, index=False)

print("DataFrame saved to:", csv_file_path)


DataFrame saved to: /kaggle/working/Extract features.csv


# Naive Bayes Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



# Split the data into features (X) and target variable (y)
X = df.drop('drowsy', axis=1)
y = df['drowsy']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Naive Bayes algorithm
naive_bayes_classifier = GaussianNB()

# Train the classifier on the training set
naive_bayes_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred_nb = naive_bayes_classifier.predict(X_test)

# Evaluate the performance of the Naive Bayes model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
conf_matrix_nb = confusion_matrix(y_test, y_pred_nb)
classification_rep_nb = classification_report(y_test, y_pred_nb)

# Print the results for Naive Bayes
print("\nNaive Bayes Accuracy:", accuracy_nb)
print("\nNaive Bayes Confusion Matrix:\n", conf_matrix_nb)
print("\nNaive Bayes Classification Report:\n", classification_rep_nb)



Naive Bayes Accuracy: 0.8795013850415513

Naive Bayes Confusion Matrix:
 [[538   3]
 [ 84  97]]

Naive Bayes Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.99      0.93       541
           1       0.97      0.54      0.69       181

    accuracy                           0.88       722
   macro avg       0.92      0.77      0.81       722
weighted avg       0.89      0.88      0.87       722



# Decision Tree Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


X = df.drop('drowsy', axis=1)
y = df['drowsy']

# Feature selection using SelectKBest with chi-squared test
X_selected = SelectKBest(chi2, k=2).fit_transform(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Decision Tree with Limited Depth
dt_classifier = DecisionTreeClassifier(max_depth=5, min_samples_split=5, min_samples_leaf=2, random_state=42)

# Train the classifier on the training set
dt_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred_dt = dt_classifier.predict(X_test)

# Evaluate the performance of the Decision Tree
accuracy_dt = accuracy_score(y_test, y_pred_dt)
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)
classification_rep_dt = classification_report(y_test, y_pred_dt)

# Print the results for Decision Tree
print("Decision Tree Accuracy:", accuracy_dt)
print("\nDecision Tree Confusion Matrix:\n", conf_matrix_dt)
print("\nDecision Tree Classification Report:\n", classification_rep_dt)

# Cross-Validation for Decision Tree
cv_scores_dt = cross_val_score(dt_classifier, X_selected, y, cv=5)
print("\nCross-Validation Scores (Decision Tree):", cv_scores_dt)
print("Mean CV Accuracy (Decision Tree):", cv_scores_dt.mean())


Decision Tree Accuracy: 1.0

Decision Tree Confusion Matrix:
 [[541   0]
 [  0 181]]

Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       541
           1       1.00      1.00      1.00       181

    accuracy                           1.00       722
   macro avg       1.00      1.00      1.00       722
weighted avg       1.00      1.00      1.00       722


Cross-Validation Scores (Decision Tree): [1.         0.99722992 1.         0.99861496 0.99583911]
Mean CV Accuracy (Decision Tree): 0.9983367975380455


# Random Forest Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


X = df.drop('drowsy', axis=1)
y = df['drowsy']

# Feature selection using SelectKBest with chi-squared test
X_selected = SelectKBest(chi2, k=2).fit_transform(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Random Forest with Limited Trees
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=5, min_samples_leaf=2, random_state=42)

# Train the Random Forest classifier on the training set
rf_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred_rf = rf_classifier.predict(X_test)

# Evaluate the performance of the Random Forest
accuracy_rf = accuracy_score(y_test, y_pred_rf)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
classification_rep_rf = classification_report(y_test, y_pred_rf)

# Print the results for Random Forest
print("\nRandom Forest Accuracy:", accuracy_rf)
print("\nRandom Forest Confusion Matrix:\n", conf_matrix_rf)
print("\nRandom Forest Classification Report:\n", classification_rep_rf)

# Cross-Validation for Random Forest
cv_scores_rf = cross_val_score(rf_classifier, X_selected, y, cv=5)
print("\nCross-Validation Scores (Random Forest):", cv_scores_rf)
print("Mean CV Accuracy (Random Forest):", cv_scores_rf.mean())



Random Forest Accuracy: 1.0

Random Forest Confusion Matrix:
 [[541   0]
 [  0 181]]

Random Forest Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       541
           1       1.00      1.00      1.00       181

    accuracy                           1.00       722
   macro avg       1.00      1.00      1.00       722
weighted avg       1.00      1.00      1.00       722


Cross-Validation Scores (Random Forest): [1.         0.99722992 1.         0.99861496 0.99583911]
Mean CV Accuracy (Random Forest): 0.9983367975380455


# MLP Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

# Assuming df_normalized includes the 'drowsy' column
X = df.drop('drowsy', axis=1)
y = df['drowsy']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# MLP with Regularization and Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, alpha=0.01, early_stopping=True, random_state=42)

# Train the MLP classifier on the scaled training set
mlp_classifier.fit(X_scaled, y)

# Make predictions on the testing set
y_pred_mlp = mlp_classifier.predict(X_test)

# Evaluate the performance of the MLP model
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
conf_matrix_mlp = confusion_matrix(y_test, y_pred_mlp)
classification_rep_mlp = classification_report(y_test, y_pred_mlp)

# Print the results for MLP
print("\nMLP Accuracy:", accuracy_mlp)
print("\nMLP Confusion Matrix:\n", conf_matrix_mlp)
print("\nMLP Classification Report:\n", classification_rep_mlp)

# Cross-Validation for MLP
cv_scores_mlp = cross_val_score(mlp_classifier, X_scaled, y, cv=5)
print("\nCross-Validation Scores (MLP):", cv_scores_mlp)
print("Mean CV Accuracy (MLP):", cv_scores_mlp.mean())


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



MLP Accuracy: 0.7493074792243767

MLP Confusion Matrix:
 [[541   0]
 [181   0]]

MLP Classification Report:
               precision    recall  f1-score   support

           0       0.75      1.00      0.86       541
           1       0.00      0.00      0.00       181

    accuracy                           0.75       722
   macro avg       0.37      0.50      0.43       722
weighted avg       0.56      0.75      0.64       722


Cross-Validation Scores (MLP): [0.99030471 0.97922438 0.99861496 0.93767313 0.94590846]
Mean CV Accuracy (MLP): 0.9703451269973605


In [None]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.


# CNN Classifier

In [None]:
import cv2
import dlib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam

# Load the data as you did before
# ...

# Split the data into features (X) and labels (y)
X = df.iloc[:, :-1].values
y = df['drowsy'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the data for CNN input (assuming each sample has 4 features)
X_train = X_train.reshape(X_train.shape[0], 2, 2, 1)
X_test = X_test.reshape(X_test.shape[0], 2, 2, 1)

# Create the CNN model
model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(2, 2, 1)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))  # Adding dropout layer with dropout rate of 0.5
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation for binary classification

# Compile the model
optimizer = Adam(learning_rate=0.00001)  # Specifying learning rate for Adam optimizer
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
y_pred_prob = model.predict(X_test)
y_pred = np.round(y_pred_prob)
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred)

# Print the results
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report_result)


2025-11-03 19:45:11.509039: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762199111.842644      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762199111.936077      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-11-03 19:45:27.073627: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5253 - loss: 0.7737 - val_accuracy: 0.2507 - val_loss: 0.8277
Epoch 2/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5697 - loss: 0.7272 - val_accuracy: 0.2507 - val_loss: 0.8432
Epoch 3/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5989 - loss: 0.6959 - val_accuracy: 0.2507 - val_loss: 0.8147
Epoch 4/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6178 - loss: 0.6584 - val_accuracy: 0.2964 - val_loss: 0.7729
Epoch 5/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6546 - loss: 0.6138 - val_accuracy: 0.3947 - val_loss: 0.7154
Epoch 6/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6811 - loss: 0.5934 - val_accuracy: 0.5512 - val_loss: 0.6487
Epoch 7/100
[1m91/91[0m [32m━━━

In [None]:
import tensorflow as tf
import numpy as np

# Pick a single example, say row 15
sample = df.iloc[15, :-1].values  # EAR, PUC, MAR, MOE
sample_scaled = scaler.transform(sample.reshape(1, -1))
sample_reshaped = sample_scaled.reshape(1, 2, 2, 1)

# Convert to TensorFlow tensor
input_tensor = tf.convert_to_tensor(sample_reshaped, dtype=tf.float32)

# Use GradientTape to compute gradients
with tf.GradientTape() as tape:
    tape.watch(input_tensor)
    prediction = model(input_tensor)

# Get the gradient of output w.r.t. input
grads = tape.gradient(prediction, input_tensor).numpy()

# Flatten and associate each gradient with its feature
gradients = grads.reshape(-1)
features = ['EAR', 'PUC', 'MAR', 'MOE']
importance = dict(zip(features, gradients))

print("Feature importance (gradient magnitude):")
for f, g in importance.items():
    print(f"{f}: {abs(g):.6f}")


Feature importance (gradient magnitude):
EAR: 0.000083
PUC: 0.071588
MAR: 0.081848
MOE: 0.207033


In [None]:
# Save in native Keras format (architecture + weights + optimizer)
model.save("/kaggle/working/my_model.keras", save_format="keras")

In [None]:
# Original model in training environment
model.save("/kaggle/working/my_model1.keras", save_format="keras")  # NOT .h5