In [1]:
import cv2

cap = cv2.VideoCapture(0)  # Try changing 0 to 1, etc.
if not cap.isOpened():
    print("Cannot open camera")
    exit()
ret, frame = cap.read()
if not ret:
    print("Can't receive frame (stream end?). Exiting ...")
    exit()
cv2.imshow('Frame', frame)
cv2.waitKey(0)
cap.release()
cv2.destroyAllWindows()


qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/dhruv/AnacondaInstallation/envs/tf-env/lib/python3.9/site-packages/cv2/qt/plugins"


In [4]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

def load_data(yawn_dir, no_yawn_dir, img_size=(64, 64)):
    """
    Load images from directories and prepare them for training.
    
    Args:
        yawn_dir (str): Path to directory containing yawn images
        no_yawn_dir (str): Path to directory containing no_yawn images
        img_size (tuple): Target size for resizing images
        
    Returns:
        X (numpy array): Image data
        y (numpy array): Labels
    """
    data = []
    labels = []
    
    # Load yawn images
    for img_file in os.listdir(yawn_dir):
        img_path = os.path.join(yawn_dir, img_file)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        if img is not None:
            img = cv2.resize(img, img_size)
            data.append(img)
            labels.append(1)  # Label for yawn
    
    # Load no_yawn images
    for img_file in os.listdir(no_yawn_dir):
        img_path = os.path.join(no_yawn_dir, img_file)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        if img is not None:
            img = cv2.resize(img, img_size)
            data.append(img)
            labels.append(0)  # Label for no_yawn
    
    # Convert to numpy arrays
    X = np.array(data)
    y = np.array(labels)
    
    return X, y

def preprocess_data(X, y, test_size=0.2, random_state=42):
    """
    Preprocess data for training.
    
    Args:
        X (numpy array): Image data
        y (numpy array): Labels
        test_size (float): Proportion of data to use for testing
        random_state (int): Seed for random number generator
        
    Returns:
        X_train (numpy array): Training images
        X_test (numpy array): Testing images
        y_train (numpy array): Training labels
        y_test (numpy array): Testing labels
    """
    # Reshape and normalize
    X = X.reshape(-1, 64, 64, 1).astype('float32') / 255
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=test_size, 
                                                        random_state=random_state)
    
    return X_train, X_test, y_train, y_test

# Set paths to your collected data
yawn_dir = 'yawn'
no_yawn_dir = 'no_yawn'

# Load data
X, y = load_data(yawn_dir, no_yawn_dir)

# Preprocess data
X_train, X_test, y_train, y_test = preprocess_data(X, y)

# Save preprocessed data (optional but recommended)
np.savez('preprocessed_data.npz', 
         X_train=X_train, 
         X_test=X_test, 
         y_train=y_train, 
         y_test=y_test)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")

Training samples: 340
Testing samples: 86


In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Load preprocessed data
data = np.load('preprocessed_data.npz')
X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train,
                    epochs=20,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stop])

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc}')

# Save the model
model.save('yawn_detection_model.h5')

2025-03-26 10:27:23.987138: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-26 10:27:24.028030: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742965044.078038  213045 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742965044.093285  213045 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742965044.142515  213045 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Epoch 1/20


I0000 00:00:1742965058.088665  216311 service.cc:152] XLA service 0x71ccc8018590 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742965058.088730  216311 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-03-26 10:27:38.198441: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1742965059.075260  216311 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 12ms/step - accuracy: 0.5780 - loss: 0.6817 

I0000 00:00:1742965065.351795  216311 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 985ms/step - accuracy: 0.5807 - loss: 0.6791 - val_accuracy: 0.6395 - val_loss: 0.6031
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.6507 - loss: 0.5759 - val_accuracy: 0.8256 - val_loss: 0.3551
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.8647 - loss: 0.3312 - val_accuracy: 0.9070 - val_loss: 0.2511
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.9359 - loss: 0.1903 - val_accuracy: 0.9419 - val_loss: 0.1334
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.9646 - loss: 0.1043 - val_accuracy: 0.9419 - val_loss: 0.1388
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.9429 - loss: 0.1340 - val_accuracy: 0.9535 - val_loss: 0.0935
Epoch 7/20
[1m11/11[0m [32m━━━━━━━━━━━━━




Test accuracy: 1.0


In [8]:
import tensorflow as tf

# Load the trained .h5 model
model = tf.keras.models.load_model('yawn_detection_model.h5')

# Convert to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the converted model
with open('yawn_detection_model.tflite', 'wb') as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: /tmp/tmp_zc5b8v_/assets


INFO:tensorflow:Assets written to: /tmp/tmp_zc5b8v_/assets


Saved artifact at '/tmp/tmp_zc5b8v_'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  125126557915088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126557916320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126560244304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125125622319936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126558061664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126558061488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126559691344: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126559691168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126559777888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  125126559777712: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1742965323.639924  213045 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1742965323.639963  213045 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-03-26 10:32:03.641066: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp_zc5b8v_
2025-03-26 10:32:03.643324: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-03-26 10:32:03.643361: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp_zc5b8v_
I0000 00:00:1742965323.660166  213045 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled
2025-03-26 10:32:03.663728: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-03-26 10:32:03.807063: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp_zc5b8v_
2025-03-26 10:32:03.843060: I tensorflow/cc/saved_model/loader.cc:471] SavedModel 

In [9]:
import cv2
import dlib
import numpy as np
import time
import tensorflow as tf

# Load the trained TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path='yawn_detection_model.tflite')
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Initialize webcam with optimized settings
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  # Lower resolution for better performance
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
cap.set(cv2.CAP_PROP_FPS, 30)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Initialize dlib's face detector and facial landmark predictor
detector = dlib.get_frontal_face_detector()
predictor_path = "/home/dhruv/CollegeProject/Trial3/customYawndata/shape_predictor_68_face_landmarks.dat"
predictor = dlib.shape_predictor(predictor_path)

def extract_mouth(image, landmarks):
    mouth_points = []
    for i in range(48, 68):
        mouth_points.append((landmarks.part(i).x, landmarks.part(i).y))
    mouth_points = np.array(mouth_points)
    x, y, w, h = cv2.boundingRect(mouth_points)
    mouth_roi = image[y:y+h, x:x+w]
    return mouth_roi

def predict_yawn(mouth_roi):
    if mouth_roi.size == 0:
        return "Unknown"
    
    mouth_roi = cv2.resize(mouth_roi, (64, 64))
    mouth_roi = mouth_roi.reshape(1, 64, 64, 1) / 255.0
    
    # Set input tensor
    interpreter.set_tensor(input_details[0]['index'], mouth_roi.astype(np.float32))
    
    # Run inference
    interpreter.invoke()
    
    # Get output tensor
    output = interpreter.get_tensor(output_details[0]['index'])
    
    return "Yawning" if output[0][0] > 0.5 else "Not Yawning"

frame_count = 0
start_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces
    faces = detector(gray)
    
    for face in faces:
        landmarks = predictor(gray, face)
        mouth_roi = extract_mouth(gray, landmarks)
        status = predict_yawn(mouth_roi)
        
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, status, (x, y-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, 
                    (0, 255, 0) if status == "Not Yawning" else (0, 0, 255), 2)
    
    cv2.imshow('Yawn Detection', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Calculate and print FPS
    frame_count += 1
    if frame_count % 30 == 0:
        elapsed_time = time.time() - start_time
        fps = frame_count / elapsed_time
        print(f"Approximate FPS: {fps:.2f}")

cap.release()
cv2.destroyAllWindows()

    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Approximate FPS: 7.28
Approximate FPS: 7.74
