In [1]:
import cv2
import mediapipe as mp
import time
import pyttsx3
import numpy as np
from scipy.spatial import distance as dist

engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id) # 0:Fr / 1:Eng
engine.setProperty('rate', 125)
mpDraw = mp.solutions.drawing_utils
mpFaceMesh = mp.solutions.face_mesh
faceMesh = mpFaceMesh.FaceMesh(max_num_faces=1)
drawSpec = mpDraw.DrawingSpec(thickness=1,circle_radius=1)

In [3]:
classNames = ['person', '0', '0', '0', '0', '0', '0', '0','0', '0', '0', '0', '0', '0',
              '0', '0', '0', '0', '0', '0', '0', '0', '0', '0','0', '0', '0', '0', '0',
              'eye glasses', '0', '0','0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
              '0', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
              'banana', 'apple', 'sandwich','orange', 'broccoli', 'carrot', 'hot dog', 
              'pizza', 'donut', 'cake', '0', '0','0', '0', '0', '0', '0', '0', '0','0',
              '0', '0', '0', '0', '0', 'cell phone', '0', '0','0', '0', '0', '0', 'book',
              '0', '0', '0','0', '0', '0', '0']

In [10]:
 ### PARAMETRES ###

draw = True
audio_alarm = False
EYE_AR_THRESH = 0.2
EYE_AR_CONSEC_FRAMES = 40
YAWN_THRESH = 30
YAWN_CONSEC_FRAMES = 40
camera = 0
classes= ['1', 30, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 77, 84]

#~~~~~~~~~~~~~~~~~~~~~#

cpt = 0
cpt2 = 0
alarm_status = False
alarm_status2 = False

configPath = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
weightsPath = 'frozen_inference_graph.pb'
net = cv2.dnn_DetectionModel(weightsPath,configPath)
net.setInputSize(320,320)
net.setInputScale(1.0/127.5)
net.setInputMean((127.5,127.5,127.5))
net.setInputSwapRB(True)

cap = cv2.VideoCapture(camera)
pTime = 0
while cv2.waitKey(1) != 27 :
    success, img= cap.read()
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = faceMesh.process(imgRGB)
    faces = []

    classIds, confs, bbox = net.detect(img,confThreshold=0.5)
    if len(classIds) != 0:
        for classId,confidence,box in zip(classIds.flatten(),confs.flatten(),bbox):
            if classId in classes:
                cv2.rectangle(img,box,color=(0,255,0),thickness=2)
                cv2.putText(img,classNames[classId-1].upper(),(box[0]+10,box[1]+30),cv2.FONT_HERSHEY_COMPLEX,.8,(0,255,0),2)
            if classId==77:
                cv2.putText(img,'CELLPHONE DETECTED',(200,400),cv2.FONT_HERSHEY_COMPLEX,.8,(0,0,255),2)
    if results.multi_face_landmarks:
        for faceLms in results.multi_face_landmarks:
            if draw:
                mpDraw.draw_landmarks(img,faceLms,mpFaceMesh.FACEMESH_CONTOURS,drawSpec,drawSpec)
            face = []
            for id, lm in enumerate(faceLms.landmark):
                ih, iw, ic = img.shape
                x,y = int(lm.x*iw), int(lm.y*ih)
                #cv2.putText(img, str(id), (x,y),cv2.FONT_HERSHEY_PLAIN,0.8, (0,255,0), 1)
                # print(id, x, y)
                face.append([x,y])

            upper_lip=[((face[13][0]+face[312][0]+face[267][0]+face[0][0]+face[37][0]+face[82][0])/6),
                        ((face[13][1]+face[312][1]+face[267][1]+face[0][1]+face[37][1]+face[82][1])/6)]

            lower_lip=[((face[14][0]+face[317][0]+face[314][0]+face[17][0]+face[84][0]+face[87][0])/6),
                        ((face[14][1]+face[317][1]+face[314][1]+face[17][1]+face[84][1]+face[87][1])/6)]

            A_eye_R = dist.euclidean(face[160],face[144])
            B_eye_R = dist.euclidean(face[158],face[153])
            C_eye_R = dist.euclidean(face[33],face[133])
            R_ear = (A_eye_R + B_eye_R) / (2.0 * C_eye_R)

            A_eye_L = dist.euclidean(face[385],face[380])
            B_eye_L = dist.euclidean(face[387],face[373])
            C_eye_L = dist.euclidean(face[362],face[263])
            L_ear = (A_eye_L + B_eye_L) / (2.0 * C_eye_L)

            ear = (R_ear + L_ear) / 2.0
            dist1 = dist.euclidean(upper_lip,lower_lip)

            if ear < EYE_AR_THRESH:
                cpt += 1
                cv2.putText(img, str(cpt), 
                        (50,100),cv2.FONT_HERSHEY_PLAIN,
                        3, (0,0,255),2)
                if cpt >= EYE_AR_CONSEC_FRAMES:

                    cv2.putText(img, 'SLEEP ALERT!', 
                        (120,100),cv2.FONT_HERSHEY_PLAIN,
                        3, (0,0,255),2)

                    if alarm_status == False:
                        alarm_status = True
                        if audio_alarm:
                            engine.say("wake up sir !")
                            engine.runAndWait()

            else:
                cpt = 0
                alarm_status = False
            
            YAWN_THRESH = dist.euclidean(face[78],face[308])/1.5 
            if dist1 > YAWN_THRESH:
                cpt2 += 1
                cv2.putText(img, str(cpt2), 
                        (50,150),cv2.FONT_HERSHEY_PLAIN,
                        3, (0,0,255),2)
                if cpt2 >= YAWN_CONSEC_FRAMES:
                    cv2.putText(img, 'Yawn alert', 
                            (120,150),cv2.FONT_HERSHEY_PLAIN,
                            3, (0,0,255),2)
                    if alarm_status2 == False:
                        alarm_status2 = True
                        if audio_alarm:
                            engine.say("Take some rest sir")
                            engine.runAndWait()

            else:
                cpt2 = 0
                alarm_status2 = False

            cv2.putText(img, 'Yawn: '+str(round(dist1,2)), 
                        (10,200),cv2.FONT_HERSHEY_PLAIN,
                        1.5, (0,255,0),1)
            cv2.putText(img, 'EAR: '+str(round(ear,2)), 
                        (10,240),cv2.FONT_HERSHEY_PLAIN,
                        1.5, (0,255,0),1)

            faces.append(face)


    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    cTime = time.time()
    fps = 1/(cTime-pTime)
    pTime = cTime 
    cv2.putText(img,f'FPS: {int(fps)}',(20,70),cv2.FONT_HERSHEY_PLAIN, 3, (0,255,0), 3)
    cv2.imshow("Video", img)
    
cv2.destroyAllWindows()