In [2]:
import cv2

import plotly.express as px

import tensorflow as tf
import tensorflow.keras as k

import numpy as np

from IPython.display import clear_output

import time

In [3]:
IMAGE_SIZE = (86, 86)
MODEL_INPUT_SIZE = IMAGE_SIZE + (3,)

In [4]:
from playsound import playsound

sound_path = r'C:\Users\rapha\My Drive\Work\jedha_dsfs\coursework\p_final_project\project_02\dog.mp3'

In [5]:
model = k.models.load_model(r"C:\Users\rapha\My Drive\Work\jedha_dsfs\coursework\p_final_project\project_02\model_checkpoints\model_checkpoint.keras")

In [6]:
def plot_img(img):
    fig = px.imshow(img, width=86, height=86)
    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
    return fig.show()

def process_img(img, image_size=IMAGE_SIZE, model_input_size=MODEL_INPUT_SIZE):
    
    def reshape_img(img, image_size=IMAGE_SIZE, model_input_size=MODEL_INPUT_SIZE):
        img = cv2.resize(img, image_size)
        img = img.reshape(model_input_size)
        img = np.expand_dims(img, axis=0)
        
        return img
    
    return reshape_img(img)

In [8]:
# banner image
banner_image = cv2.imread(r"C:\Users\rapha\My Drive\Work\jedha_dsfs\coursework\p_final_project\project_02\banner_wake_up.jpg")
banner_size = (150, 150)
banner_image = cv2.resize(banner_image, banner_size)

banner_gray = cv2.cvtColor(banner_image, cv2.COLOR_BGR2GRAY)
_, mask = cv2.threshold(banner_gray, 1, 255, cv2.THRESH_BINARY)

# video params and initialize variables
frame_rate = 9
pred_buffer_short = []
prev = 0
seconds_between_sounds = 5
seconds_between_images = 0.2
last_image = 0
last_sound = 0

# prediction params
pred_buffer = []
long_buffer_depth_ratio = 3 # buffer depth in seconds
short_buffer_depth_ratio = 1
long_buffer_depth = int(round(frame_rate * long_buffer_depth_ratio))
short_buffer_depth = long_buffer_depth - int(round(frame_rate * short_buffer_depth_ratio))

pred_threshold = 0.2

In [9]:
# read the haarcascade to detect the faces, eyes and mouth in an image
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade_eye_tree_eyeglasses.xml')
mouth_cascade = cv2.CascadeClassifier()

# get video capture
cap = cv2.VideoCapture(0)

# Time profiling
# start_time = time.time()

while True:
    
    #Start time of iteration
    time_elapsed = time.time() - prev
    
    # Capture frame by frame
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Only process on x frames per second
    if time_elapsed > 1./frame_rate:
        # profile
        # cascade_start_time = time.time()
        
        # last frame time
        prev = time.time()

        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Extract face using cascade
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
        
        # Get face img by coords
        for (x, y, w, h) in faces:
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = frame[y:y+h, x:x+w]
            
            # Get eye coords from face
            eyes = eye_cascade.detectMultiScale(roi_gray)
            
            # Get eye img by coors
            for (ex, ey, ew, eh) in eyes:
                eye_img = roi_color[ey:ey+eh, ex:ex+ew]
                
                # profile
                # cascade_end_time = time.time()
                # print(f"Cascade time: {cascade_end_time - cascade_start_time}")
                
                # Process eye img for prediction
                predictable_eye = process_img(eye_img)
                # plot_img(eye_img)
                
                # Predict on eye, append to result to list
                # predict_start = time.time()
                prediction = model.predict(predictable_eye, verbose=0)
                # predict_end = time.time()
                # print(f"Prediction time: {predict_end - predict_start}")
                
                if prediction[0][0] > 0:
                    pred_buffer.append(False)
                else:
                    pred_buffer.append(True)
                
                # If buffer is full, remove oldest element
                if len(pred_buffer) > long_buffer_depth:
                    pred_buffer.pop(0)
                    
                # Print overall prediction
                # clear_output(wait=True)
                # print(np.mean(pred_buffer))
                
            # if the short buffer is not completely full of false results
            if np.sum(pred_buffer[short_buffer_depth:]) != 0:
                # if the long buffer threshold is met, OR the short buffer is full of positives
                if (np.mean(pred_buffer) > pred_threshold) |\
                    (np.mean(pred_buffer[short_buffer_depth:]) == 1):
                    # then you are sleepy
                    drowsy = True
                else:
                    drowsy = False
            else:
                drowsy = False
                        
            if drowsy:
                print("You're falling asleep!")
                # print(pred_buffer[::-1])
                clear_output(wait=True)
                # if (time.time() - last_image) > seconds_between_images:
                # set roi where to add banner image
                roi_b = frame[-banner_size[1]-10:-10, -banner_size[0]-10:-10]
                roi_b[np.where(mask)] = 0
                roi_b += banner_image
                last_image = time.time()
                
                # play sound
                # if (time.time() - last_sound) > seconds_between_sounds:
                #     playsound(sound_path)
                #     last_sound = time.time()
                    
            if drowsy is not True:
                clear_output()
                    
        cv2.imshow("Webcam", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()