In [1]:
%pip install tensorflow opencv-python matplotlib numpy

Note: you may need to restart the kernel to use updated packages.


### Libraries

In [2]:
import tensorflow as tf
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np

### image preprocessing

In [3]:
DataSetDirectory = r"dataset\train"
Classes = ["Closed_Eyes", "Open_Eyes"]
img_size = 224 # mobilenet takes (224,224,3) images as input
training_data = []

def create_training_data():
    for category in Classes:
        path = os.path.join(DataSetDirectory, category) # create path to Closed_Eyes and Open_Eyes
        class_label = Classes.index(category) # get the classification  (0 or 1). 0=Closed_Eyes 1=Open_Eyes
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_COLOR)
                    # The cv2.IMREAD_COLOR flag ensures the image is read in BGR mode by default (3 channel).
                    # The result is a 3D NumPy array (img_array) where each pixel value represents intensity (0 for black, 255 for white).
                backtorgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                    # Converts the 3-channel BGR image (img_array) into a 3-channel RGB image.
                new_array = cv2.resize(backtorgb, (img_size, img_size))
                training_data.append([new_array, class_label])
            except Exception as e: 
                pass
            
create_training_data()

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset\\train\\Closed_Eyes'

In [None]:
print(len(training_data))
print(training_data[0][0].shape)

img = training_data[1200][0]
plt.imshow(img) 
    # cmap: color map only used for 2D arrays (grayscale images)
    # plt.imshow() takes array 
    # cv2.imread() takes file path (not array)
plt.show()

### Split features and labels in X and Y (Numpy Array)

In [None]:
import random 
random.shuffle(training_data) # shuffle the data so that model does not learn any pattern

x = [] # features
y = [] # labels

for features, label in training_data:
    x.append(features)
    y.append(label)

x = np.array(x).reshape(-1, img_size, img_size, 3) 
    # -1 means numpy will automatically calculate the first dimension of array, which is number of samples
    # img_size, img_size are height and width of image
    # 3 is number of channels (RGB)
y = np.array(y)


### Normalisation (MinMax Scaler)

In [None]:
%pip install scikit-learn

In [None]:
from sklearn.preprocessing import MinMaxScaler

print("Before Normalization !")
print(x[1200])
x_temp = x

# Flatten the data to 2D (samples, features) for scaling as MinMaxScaler & StandardScaler work on 2D data
x_flat = x.reshape(-1, img_size * img_size * 3)
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x_flat)

# Reshape back to original dimensions
x = x_scaled.reshape(-1, img_size, img_size, 3)

print("\nAfter Normalization !")
print(x[1200])

# -------------- It is similar to dividing by 255.0, all pixel values come under [0-1] ----------------
# x_temp = x_temp/255.0 # normalize the data
# print(x_temp[1600])
# ----------------------------------------------------------------------------------------------------
print("\nShape of input features !")
print(x.shape)

### Store the normalized dataset so that no need to run from the cell 1: pickle

In [None]:
import pickle

In [None]:
pickle_out = open("x.pickle", "wb")
pickle.dump(x, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()

In [None]:
pickle_in = open("x.pickle", "rb")
x = pickle.load(pickle_in)

pickle_in = open("y.pickle", "rb")
y = pickle.load(pickle_in)

    # It saves the normalized data so that no need to run from the start again and again.

### Training via Transfer learning using MobileNet

In [None]:
model = tf.keras.applications.mobilenet.MobileNet()
model.summary()

### Freeze layers up to and including the dropout layer
```
freeze = True  # Flag to control freezing

for layer in model.layers:
    layer.trainable = not freeze  # Freeze layers before the dropout layer
    if layer.name == "dropout":  # Check if the current layer is the dropout layer
        freeze = False  # Stop freezing after the dropout layer
        
```

### Freeze layers up to and including the dropout layer (index -4)
```
for i, layer in enumerate(model.layers):
    if i <= len(model.layers) - 4:  # Freeze layers up to index -4
        layer.trainable = False
    else:
        layer.trainable = True

```

In [None]:
tmp = tf.keras.applications.mobilenet.MobileNet()

In [None]:
print(len(tmp.layers))
# cnt = 0
# for _ in tmp.layers:
#     cnt += 1
# print(cnt)

In [None]:
# Unfreeze the last few layers of MobileNet for fine-tuning
for layer in model.layers[:-6]:  # Freeze all layers except the last 6 out of total (91) layers 
    layer.trainable = False

for layer in model.layers[-6:]:  # Unfreeze the last 6 layers
    layer.trainable = True

In [None]:
# Add dropout and L2 regularization to the custom layers
# To reduce overfitting and improve generalization
from tensorflow.keras import regularizers

base_input = model.input
    # Retrieves the input layer of the pre-trained MobileNet model.
    # model.input refers to the first layer of the MobileNet model, which is the input layer.
    # base_input will be used as the input for the new model being created.
base_output = model.layers[-4].output
    # model.layers[-4] refers to the layer that is four layers before the last layer in the MobileNet model.
    # base_output will be used as the starting point for adding new layers to the model.

flat_layer = tf.keras.layers.Flatten(name="flatten")(base_output)
    # Adds a flattening layer to the model.
    # The Flatten layer converts the multi-dimensional output of the previous layer (base_output) into a 1D vector.
    # This is necessary because fully connected (dense) layers require 1D input.
    
# Add a dropout layer to reduce overfitting
dropout_layer = tf.keras.layers.Dropout(0.5, name="dropout_1")(flat_layer)

# Add a dense layer with L2 regularization
dense_layer = tf.keras.layers.Dense(
    1, 
    kernel_regularizer=regularizers.l2(0.01),  # L2 regularization
    name="dense_1"
)(dropout_layer)
    # Adds a dense (fully connected) layer with 1 neuron.
    # The 1 indicates that the layer has a single output, which is suitable for binary classification (e.g., "Closed_Eyes" vs. "Open_Eyes").
    
final_output = tf.keras.layers.Activation('sigmoid', name="sigmoid_output")(dense_layer)
    # Applies the sigmoid activation function to the output of the dense layer.
    # The sigmoid function squashes the output to a range between 0 and 1, making it interpretable as a probability.

In [None]:
new_model = tf.keras.Model(inputs=base_input, outputs=final_output)

In [None]:
# Compile the model
new_model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    metrics=['accuracy']
)

# new_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# optimizers let the model train faster.

In [None]:
new_model.summary()

In [None]:
new_model.fit(x, y, batch_size=10, epochs=4, validation_split=0.3)

In [None]:
new_model.save('my_model.h5')

### Testing on testing dataset

In [None]:
new_model = tf.keras.models.load_model('my_model.h5')

In [None]:
test_data_directory = r"dataset\test"  # Update this path to your testing dataset
img_size = 224  # Same as used during training
classes = ["Closed_Eyes", "Open_Eyes"]  # Same class labels as training

# Preprocess the testing data
testing_data = []

def preprocess_testing_data():
    for category in classes:
        path = os.path.join(test_data_directory, category)
        class_label = classes.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_COLOR)
                backtorgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)  # Same as training
                new_array = cv2.resize(backtorgb, (img_size, img_size))
                testing_data.append([new_array, class_label])
            except Exception as e:
                pass

preprocess_testing_data()

In [None]:
len(testing_data)

In [None]:
x_test = []
y_test = []

for features, label in testing_data:
    x_test.append(features)
    y_test.append(label)

x_test = np.array(x_test).reshape(-1, img_size, img_size, 3)  # Reshape to match input shape
y_test = np.array(y_test)

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Normalisation of test data
x_test_flat = x_test.reshape(-1, img_size * img_size * 3)
scaler = MinMaxScaler()
x_test_scaled = scaler.fit_transform(x_test_flat)

# Reshape back to original dimensions
x_test = x_test_scaled.reshape(-1, img_size, img_size, 3)

### Save normalised testing data via pickle

In [None]:
pickle_out = open("x_test.pickle", "wb")
pickle.dump(x_test, pickle_out)
pickle_out.close()

pickle_out = open("y_test.pickle", "wb")
pickle.dump(y_test, pickle_out)
pickle_out.close()

In [None]:

pickle_in = open("x_test.pickle", "rb")
x_test = pickle.load(pickle_in)

pickle_in = open("y_test.pickle", "rb")
y_test = pickle.load(pickle_in)

In [None]:
# Evaluate the model on testing data
test_loss, test_accuracy = new_model.evaluate(x_test, y_test, batch_size=25, verbose=1)
print(f"Testing Loss: {test_loss}")
print(f"Testing Accuracy: {test_accuracy}")

### Prediction demo

In [None]:
temp_img = cv2.imread(r"dataset\test\Closed_Eyes\_112.jpg", cv2.IMREAD_COLOR)
temp_backtorgb = cv2.cvtColor(temp_img, cv2.COLOR_BGR2RGB)
temp_img_array = cv2.resize(temp_backtorgb, (img_size, img_size))
temp_array = np.array(temp_img_array).reshape(1, img_size, img_size, 3)
temp_array = temp_array/255.0  # Normalize the image
plt.imshow(temp_img_array)

In [None]:
temp_prediction = new_model.predict(temp_array)
print(temp_prediction)

In [None]:
temp_img = cv2.imread(r"dataset\test\Open_Eyes\_112.jpg", cv2.IMREAD_COLOR)
temp_backtorgb = cv2.cvtColor(temp_img, cv2.COLOR_BGR2RGB)
temp_img_array = cv2.resize(temp_backtorgb, (img_size, img_size))
temp_array = np.array(temp_img_array).reshape(1, img_size, img_size, 3)
temp_array = temp_array/255.0  # Normalize the image
plt.imshow(temp_img_array)

In [None]:
prediction = new_model.predict(temp_array)
print(prediction)

### Prediction on Unknown image: Fetching eyes using Haar Cascade

In [None]:
women_img = cv2.imread(r"sad_women_open_eye2.webp", cv2.IMREAD_COLOR)
    # cv2 reads image in BGR by deafault
women_img = cv2.cvtColor(women_img, cv2.COLOR_BGR2RGB)
print(women_img.shape)
plt.imshow(women_img)
plt.show()

In [None]:
faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
eyeCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')

In [None]:
# Convert the image to grayscale for better detection
gray_women_img = cv2.cvtColor(women_img, cv2.COLOR_RGB2GRAY)
    # coz, Haar Cascade works better on grayscale images

# Detect eyes in the grayscale image
eyes = eyeCascade.detectMultiScale(gray_women_img, scaleFactor=1.1, minNeighbors=4)
    # scaleFactor
    # Haar Cascade works by scanning the image at multiple scales (zoom levels). 
    # It starts with the original image size and gradually reduces the size of the image to detect objects of different sizes.
    # If scaleFactor=1.1, the image size is reduced by 10% at each step. For example:
    
    # minNeighbors:
    # Haar Cascade works by sliding a detection window over the image and identifying objects. 
    # It often detects the same object multiple times in slightly different positions or sizes. 
    # These overlapping detections are grouped into a single detection if they are close enough.
    # If minNeighbors=5, it means that at least 5 overlapping detections must agree for the object to be considered valid.
    
    # minSize
    # minSize=(30, 30): Only objects larger than 30x30 pixels are considered.
    # If not mentioned, then it detects objects of all size. (Here object is eye)

In [None]:
# Check if any eyes are detected
if len(eyes) == 0:
    print("No eyes detected.")
else:
    # Draw rectangles around detected eyes
    for (x, y, w, h) in eyes:
        cv2.rectangle(women_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
            # for (x, y, w, h) in eyes:
                # Iterates through all detected eyes. Each detection is represented as a rectangle with:
                    # (x, y): Top-left corner of the rectangle.
                    # (w, h): Width and height of the rectangle.
                    
                    # cv2.rectangle():
                        # Draws a rectangle around the detected eyes.
                        # Parameters:
                            # women_img: The image on which the rectangle is drawn.
                            # (x, y): Top-left corner of the rectangle.
                            # (x + w, y + h): Bottom-right corner of the rectangle.
                            # (0, 255, 0): The color of the rectangle in RGB (green in this case).
                            # 2: The thickness of the rectangle.

In [None]:
# Display the image with rectangles
plt.imshow(women_img)
plt.show()

### Cropping the eyes

In [None]:
for x, y, w, h in eyes:
    roi_gray = gray_women_img[y:y+h, x:x+w]
    roi_color = women_img[y:y+h, x:x+w]
        # roi: region of interest :)
    eyes_in_roi_gray = eyeCascade.detectMultiScale(roi_gray)
    
    if len(eyes_in_roi_gray) == 0:
        print("Eyes are not detected")
    else:
        for (ex, ey, ew, eh) in eyes_in_roi_gray:
            eye_roi = roi_color[ey:ey+eh, ex:ex+ew]
            plt.imshow(eye_roi)
            plt.show()
            
'''
for x, y, w, h in eyes:

The eyes variable contains a list of rectangles, where each rectangle represents a detected eye.
Each rectangle is defined by:
(x, y): Top-left corner of the rectangle.
(w, h): Width and height of the rectangle.

roi_gray:

This extracts the region of interest (ROI) for the detected eye from the grayscale image (gray_women_img).
The ROI is a cropped portion of the image containing the detected eye.

roi_color:

This extracts the same region of interest (ROI) for the detected eye from the original RGB image (women_img).
'''


'''
eyes_in_roi_gray = eyeCascade.detectMultiScale(roi_gray)

This line applies the eye cascade again to the cropped grayscale ROI (roi_gray).
It attempts to detect smaller eyes within the cropped region. This is useful for refining the 
detection or handling cases where the initial detection was too broad.
'''


'''for (ex, ey, ew, eh) in eyes_in_roi_gray:
eye_roi:
This extracts the final cropped eye from the colored ROI (roi_color).
(ex, ey, ew, eh) are the coordinates of the detected eye within the ROI.
'''

In [None]:
plt.imshow(eye_roi)
    # predicting the last stored eye !

In [None]:
eye_roi.shape

In [None]:
final_img = cv2.resize(eye_roi, (img_size, img_size))
plt.imshow(final_img)
print(final_img.shape)
# final_img = np.array(final_img).reshape(1, img_size, img_size, 3)

In [None]:
final_img = np.array(final_img).reshape(1, img_size, img_size, 3)
final_img = final_img/255.0  # Normalize the image
prediction = new_model.predict(final_img)
print(prediction)

### Live Video Demo of Drowsiness alert

In [None]:
import winsound
# Import the winsound module for playing warning sounds

frequency = 2500
# Set the beep sound frequency to 2500 Hertz, which is high-pitched and noticeable

duration = 1000
# Set the duration of the beep sound to 1000 ms (1 second)

# Create a CascadeClassifier object for face detection using the XML file
faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Start video capture from the webcam; '1' may be replaced with '0' for default webcam
cap = cv2.VideoCapture(1)

# Check if the webcam is successfully opened
if not cap.isOpened():
    cap = cv2.VideoCapture(0) 
    # Try opening the default camera if previous failed

if not cap.isOpened():
    raise IOError("Cannot open webcam")
    # If still unable to open, raise an error and exit

counter = 0
# Initialize a counter variable to keep track of consecutive closed-eye frames

while True:
    # Continuously read video frames in an infinite loop

    ret, frame = cap.read()
    # Read one frame from the webcam; 'ret' is True if successful

    # Haarcascade for eye detection with eyeglasses support
    eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye_tree_eyeglasses.xml')

    # Convert the frame to grayscale for easier detection processing
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    faces = faceCascade.detectMultiScale(gray, 1.1, 4)

    # Draw rectangles around detected faces
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        # Draw a green rectangle; thickness of 2 pixels

    font = cv2.FONT_HERSHEY_SIMPLEX
    # Set the font type for text to be displayed on the video

    # Detect eyes using the eye cascade
    eyes = eye_cascade.detectMultiScale(gray, 1.1, 4)

    # Check if eyes are detected
    if len(eyes) == 0:
        print("eyes are not detected")
        # Print a message when no eyes are detected
    else:
        # For every detected eye, select region of interest and process
        for (ex, ey, ew, eh) in eyes:
            eyes_roi = frame[ey:ey+eh, ex:ex+ew]
            # Extract the eye region from the frame

            # Resize the eye region to 224x224 pixels for model input
            final_image = cv2.resize(eyes_roi, (224, 224))

            # Add a new axis to make the image batch-compliant for model prediction
            # final_image = np.expand_dims(final_image, axis=0)
            final_image = np.array(final_image).reshape(1, 224, 224, 3)

            # Normalize pixel values to range [0, 1] for model compatibility
            final_image = final_image / 255.0

            # Predict eye state (open/closed) using a trained model
            Predictions = new_model.predict(final_image)
            
            # If the eyes are open according to model prediction
            if Predictions > 0.5:
                status = "Open Eyes"
                # Set status to indicate eyes are open

                cv2.putText(
                    frame, status, (150, 150), font, 3, (0, 255, 0), 2, cv2.LINE_4
                )
                # Display 'Open Eyes' in green at position (150,150)

                x1, y1, w1, h1 = 0, 0, 175, 75
                # Coordinates for a black rectangle background for text

                # Draw black rectangle for text background
                cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), (0, 0, 0), -1)
            else:
                counter = counter + 1
                # Increment the counter for closed-eye frames

                status = "Closed Eyes"
                # Set status to indicate eyes are closed

                cv2.putText(
                    frame, status, (150, 150), font, 3, (0, 0, 255), 2, cv2.LINE_4
                )
                # Display 'Closed Eyes' in red at position (150,150)

                # Draw red rectangle around detected eyes
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)

                # If closed-eye counter exceeds threshold (e.g., 5), trigger sleep alert
                if counter > 5:
                    x1, y1, w1, h1 = 0, 0, 175, 75
                    # Coordinates for alert background

                    # Draw black rectangle for alert text background
                    cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), (0, 0, 0), -1)

                    # Display sleep alert text on video frame
                    cv2.putText(
                        frame,
                        'Sleep Alert !!',
                        (x1 + int(w1 / 10), y1 + int(h1 / 2)),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.7,
                        (0, 0, 255),
                        2
                    )

                    # Play warning beep sound
                    winsound.Beep(frequency, duration)
                    # Reset counter after alert
                    counter = 0

    # Display the video frame with all drawn rectangles and text
    cv2.imshow('Drowsiness Detection Tutorial', frame)

    # Wait for 2 milliseconds; if 'q' key is pressed, exit loop
    if cv2.waitKey(2) & 0xFF == ord('q'):
        break
        # Loop ends if user presses 'q'

# Release webcam and destroy all OpenCV windows after loop ends
cap.release()
cv2.destroyAllWindows()
