In [None]:
!pip install -q opencv-python dlib imutils tensorflow
!apt-get install -y cmake
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
!bzip2 -d shape_predictor_68_face_landmarks.dat.bz2


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
--2025-05-25 07:29:48--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 [following]
--2025-05-25 07:29:49--  https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Connecting to dlib.net (dlib.net)|107.180.26.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2’


2025-05-25 07:29:55 (11.0 MB/s) - ‘shape_predictor_68_face_landmarks.dat.bz2’ saved [64040097/64040097]



In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json when prompted


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"anushadaya","key":"b0277c7cc020cbe7805a3a8764cb4b5c"}'}

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d akashshingha850/mrl-eye-dataset
!unzip -q mrl-eye-dataset.zip -d /content/MRL_eye_dataset


Dataset URL: https://www.kaggle.com/datasets/akashshingha850/mrl-eye-dataset
License(s): MIT
Downloading mrl-eye-dataset.zip to /content
 99% 326M/329M [00:01<00:00, 253MB/s]
100% 329M/329M [00:02<00:00, 136MB/s]


In [None]:
!kaggle datasets download -d davidvazquezcic/yawn-dataset
!unzip -q yawn-dataset.zip -d /content/Yawn_Dataset


Dataset URL: https://www.kaggle.com/datasets/davidvazquezcic/yawn-dataset
License(s): CC-BY-NC-SA-4.0
Downloading yawn-dataset.zip to /content
  0% 0.00/16.9M [00:00<?, ?B/s]
100% 16.9M/16.9M [00:00<00:00, 1.08GB/s]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import cv2
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def augment_data(images, labels):
    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    augmented_images, augmented_labels = [], []
    for img, label in zip(images, labels):
        # Ensure img shape is (1, 50, 50, 1) for flow method
        img = img.reshape(1, 50, 50, 1)
        # Generate 5 augmented images per original image
        aug_iter = datagen.flow(img, batch_size=1)
        for _ in range(5):
            aug_img = next(aug_iter)[0]  # shape: (50, 50, 1)
            augmented_images.append(aug_img)
            augmented_labels.append(label)
    return np.array(augmented_images), np.array(augmented_labels)

def load_images(folder, label):
    data = []
    for filename in glob(os.path.join(folder, '*')):
        img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        img = cv2.resize(img, (50, 50))
        img = img.astype('float32') / 255.0  # convert to float32 and normalize
        data.append((img, label))
    return data

# Load MRL Eye Dataset
eye_open = load_images('/content/MRL_eye_dataset/data/train/awake', 0)
eye_closed = load_images('/content/MRL_eye_dataset/data/train/sleepy', 1)

# Load Yawn Dataset
yawn = load_images('/content/Yawn_Dataset/yawn', 1)
no_yawn = load_images('/content/Yawn_Dataset/no_yawn', 0)

eye_data = eye_open + eye_closed
mouth_data = yawn + no_yawn

def prepare_dataset(data):
    X = np.array([img.reshape(50, 50, 1) for img, _ in data], dtype=np.float32)
    y = np.array([label for _, label in data], dtype=np.int32)
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_eye_train, X_eye_test, y_eye_train, y_eye_test = prepare_dataset(eye_data)
X_mouth_train, X_mouth_test, y_mouth_train, y_mouth_test = prepare_dataset(mouth_data)

# Convert grayscale mouth images to 3-channel RGB for MobileNet input
X_mouth_train_rgb = np.repeat(X_mouth_train, 3, axis=-1)
X_mouth_test_rgb = np.repeat(X_mouth_test, 3, axis=-1)

# Augment mouth training data (grayscale)
X_mouth_train_aug, y_mouth_train_aug = augment_data(X_mouth_train, y_mouth_train)

# Combine original and augmented mouth training data
X_mouth_train = np.concatenate([X_mouth_train, X_mouth_train_aug], axis=0)
y_mouth_train = np.concatenate([y_mouth_train, y_mouth_train_aug], axis=0)

# Make sure to convert augmented mouth train grayscale images to RGB for MobileNet as well
X_mouth_train_rgb_aug = np.repeat(X_mouth_train_aug, 3, axis=-1)
X_mouth_train_rgb = np.concatenate([X_mouth_train_rgb, X_mouth_train_rgb_aug], axis=0)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense

def build_cnn(conv_layers):
    model = Sequential()
    model.add(Conv2D(conv_layers[0], (3, 3), activation='relu', input_shape=(50, 50, 1)))
    model.add(MaxPooling2D(2, 2))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    for filters in conv_layers[1:]:
        model.add(Conv2D(filters, (3, 3), activation='relu'))
        model.add(MaxPooling2D(2, 2))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_pretrained_mouth_model():
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(50, 50, 3))
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Build and train eye models
eye_cnn1 = build_cnn([32, 64])
eye_cnn2 = build_cnn([32, 64, 128])
eye_cnn3 = build_cnn([64, 128, 256])

eye_cnn1.fit(X_eye_train, y_eye_train, epochs=10, validation_split=0.1, batch_size=64)
eye_cnn2.fit(X_eye_train, y_eye_train, epochs=10, validation_split=0.1, batch_size=64)
eye_cnn3.fit(X_eye_train, y_eye_train, epochs=10, validation_split=0.1, batch_size=64)

# Build and train mouth models
mouth_cnn1 = build_cnn([32, 64])
mouth_cnn2 = build_cnn([32, 64, 128])
mouth_cnn3 = build_cnn([64, 128, 256])

mouth_cnn1.fit(X_mouth_train, y_mouth_train, epochs=10, validation_split=0.1, batch_size=64)
mouth_cnn2.fit(X_mouth_train, y_mouth_train, epochs=10, validation_split=0.1, batch_size=64)
mouth_cnn3.fit(X_mouth_train, y_mouth_train, epochs=10, validation_split=0.1, batch_size=64)

mouth_pretrained_model = build_pretrained_mouth_model()
mouth_pretrained_model.fit(X_mouth_train_rgb, y_mouth_train, epochs=10, validation_split=0.1, batch_size=64)


# Save Eye Models in the new Keras format
eye_cnn1.save("eye_cnn1.keras")
eye_cnn2.save("eye_cnn2.keras")
eye_cnn3.save("eye_cnn3.keras")

# Save Mouth Models in the new Keras format
mouth_cnn1.save("mouth_cnn1.keras")
mouth_cnn2.save("mouth_cnn2.keras")
mouth_cnn3.save("mouth_cnn3.keras")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.8831 - loss: 0.4387 - val_accuracy: 0.8339 - val_loss: 0.3699
Epoch 2/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9521 - loss: 0.1365 - val_accuracy: 0.9467 - val_loss: 0.1534
Epoch 3/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9647 - loss: 0.1019 - val_accuracy: 0.9664 - val_loss: 0.1074
Epoch 4/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9687 - loss: 0.0875 - val_accuracy: 0.9760 - val_loss: 0.0754
Epoch 5/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9725 - loss: 0.0743 - val_accuracy: 0.9764 - val_loss: 0.0816
Epoch 6/10
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.9740 - loss: 0.0711 - val_accuracy: 0.9708 - val_loss: 0.0744
Epoch 7/10
[1m574/574[0

  base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(50, 50, 3))


Epoch 1/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 38ms/step - accuracy: 0.8942 - loss: 0.2614 - val_accuracy: 0.9345 - val_loss: 0.1818
Epoch 2/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9472 - loss: 0.1373 - val_accuracy: 0.9361 - val_loss: 0.1829
Epoch 3/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9542 - loss: 0.1150 - val_accuracy: 0.9418 - val_loss: 0.1741
Epoch 4/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.9604 - loss: 0.1037 - val_accuracy: 0.9430 - val_loss: 0.1740
Epoch 5/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9685 - loss: 0.0849 - val_accuracy: 0.9442 - val_loss: 0.1800
Epoch 6/10
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.9688 - loss: 0.0799 - val_accuracy: 0.9451 - val_loss: 0.1776
Epoch 7/10
[1m346/346[

In [None]:
from google.colab import files

# Download Eye Models
files.download("eye_cnn1.keras")
files.download("eye_cnn2.keras")
files.download("eye_cnn3.keras")

# Download Mouth Models
files.download("mouth_cnn1.keras")
files.download("mouth_cnn2.keras")
files.download("mouth_cnn3.keras")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def ensemble_predict(models, X, rgb_model_indices=None, weights=None):
    """
    Generate predictions from an ensemble of models with differing input requirements.

    Args:
        models (list): List of models in the ensemble.
        X (numpy.ndarray): Input data (grayscale or RGB, depending on the model).
        rgb_model_indices (list): Indices of models that require RGB input.
        weights (list): List of weights for averaging predictions. If None, equal weights are applied.

    Returns:
        numpy.ndarray: Final ensemble predictions.
    """
    if rgb_model_indices is None:
        rgb_model_indices = []

    if weights is None:
        weights = [1.0 / len(models)] * len(models)  # Equal weights by default

    if len(weights) != len(models):
        raise ValueError("Number of weights must match the number of models in the ensemble.")

    preds = []
    for i, model in enumerate(models):
        if i in rgb_model_indices:
            # Convert grayscale to RGB for models requiring RGB input
            X_input = np.repeat(X, 3, axis=-1)
        else:
            X_input = X

        preds.append(model.predict(X_input, verbose=0).flatten())

    preds = np.array(preds)
    avg_preds = np.average(preds, axis=0, weights=weights)
    return (avg_preds > 0.5).astype(int)


# Define your ensembles
eye_models = [eye_cnn1, eye_cnn2, eye_cnn3]
mouth_models = [mouth_cnn1, mouth_cnn2, mouth_cnn3, mouth_pretrained_model]

# Define indices of models that require RGB input in mouth_models (0-based indexing)
rgb_model_indices = [3]  # Only mouth_pretrained_model

# Define weights for averaging predictions
eye_weights = [0.5, 0.3, 0.2]
mouth_weights = [0.5, 0.3, 0.2, 0.5]

# Evaluate Eye and Mouth Ensemble
eye_pred = ensemble_predict(eye_models, X_eye_test, weights=eye_weights)
mouth_pred = ensemble_predict(mouth_models, X_mouth_test, rgb_model_indices=rgb_model_indices, weights=mouth_weights)

from sklearn.metrics import accuracy_score
print("Eye Ensemble Accuracy:", accuracy_score(y_eye_test, eye_pred))
print("Updated Mouth Ensemble Accuracy:", accuracy_score(y_mouth_test, mouth_pred))



Eye Ensemble Accuracy: 0.9720259128386337
Updated Mouth Ensemble Accuracy: 0.984375


In [None]:
import dlib
from imutils import face_utils
from tensorflow.keras.preprocessing.image import img_to_array

face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor("/content/drive/MyDrive/Drowsiness Proj/shape_predictor_68_face_landmarks.dat")

def detect_drowsiness(image, eye_models, mouth_models):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = face_detector(gray, 0)

    for rect in rects:
        shape = landmark_predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)

        left_eye = shape[36:42]
        right_eye = shape[42:48]
        mouth = shape[48:68]

        def crop_and_process(region):
            x, y, w, h = cv2.boundingRect(np.array([region]))
            roi = gray[y:y + h, x:x + w]
            roi = cv2.resize(roi, (50, 50))
            roi = roi.astype("float32") / 255.0
            return roi.reshape(1, 50, 50, 1)

        left_eye_img = crop_and_process(left_eye)
        right_eye_img = crop_and_process(right_eye)
        mouth_img = crop_and_process(mouth)

        eye_pred = (ensemble_predict(eye_models, left_eye_img)[0] + ensemble_predict(eye_models, right_eye_img)[0]) / 2
        mouth_pred = ensemble_predict(mouth_models, mouth_img)[0]

        if eye_pred == 1 or mouth_pred == 1:
            print("⚠️ Drowsiness Detected!")
        else:
            print("✅ Driver is Alert")


In [None]:
frame = cv2.imread('/content/Yawn_Dataset/yawn/1000.jpg')  # Upload your test image
detect_drowsiness(frame, eye_models, mouth_models)