In [1]:
from tensorflow.keras.models import model_from_json
import cv2 as cv
import pyautogui
import numpy as np

In [2]:
#CARGAR MODELO
json_file = open('data/model/output/modelboredom.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
#from keras.models import model_from_json
modelTL = model_from_json(loaded_model_json)
# load weights into new model
modelTL.load_weights("data/model/output/modelboredom.h5")

In [3]:
def detect_faces(route):
    original_image = route
    grayscale_image = cv.cvtColor(original_image, cv.COLOR_BGR2GRAY)
    grayscale_image = original_image

    face_cascade = cv.CascadeClassifier('data/face_classifier.xml')

    detected_faces = face_cascade.detectMultiScale(grayscale_image)

    for (column, row, width, height) in detected_faces:
        cv.rectangle(
            original_image,
            (column, row),
            (column + width, row + height),
            (0, 255, 0),
            2
        )
        
    return detected_faces

In [4]:
def cut_face(img, face_coords):
    x0 = face_coords[0]
    y0 = face_coords[1]
    
    width = face_coords[2]
    height = face_coords[3]
    
    x1 = x0 + width
    y1 = y0 + height

    return img[y0:y1, x0:x1]

In [5]:
def init_values(faces_detected, dict_faces):
    dict_faces["faces"] = faces_detected
    dict_faces["msg"] = []
    dict_faces["color"] = []
    counter = dict()
    counter["bored"] = 0
    counter["engaged"] = 0
    counter["neutral"] = 0
    face_number = 0
    
    return dict_faces, counter, face_number

In [6]:
def predict_state(face, frame):
    only_face = cut_face(frame, face)
    small_face = cv.resize(only_face, (48, 48))
    grey_face = cv.cvtColor(small_face, cv.COLOR_BGR2GRAY)
    flat_array = np.array(grey_face).flatten()
    flat_array = flat_array.reshape(1, 48, 48, 1)
    predicted_Y = modelTL.predict(flat_array)
    predicted_state = np.argmax(predicted_Y, axis=1)[0]
    percentage = round(predicted_Y[0][predicted_state] * 100,2)
    
    return predicted_state, percentage

In [7]:
def draw_text(detected_faces, frame, counter):

    for i in range(len(detected_faces["faces"])):
        cv.putText(frame, detected_faces["msg"][i] , 
                   (detected_faces["faces"][i][0] +(int)(detected_faces["faces"][i][2]/2), 
                    detected_faces["faces"][i][1] - 10), cv.FONT_HERSHEY_DUPLEX, 1,  
                   detected_faces["color"][i] , 2)
    cv.putText(frame, "Bored: " + str(counter["bored"]) + " - Neutral: " + str(counter["neutral"]) + " - Engaged: " + str(counter["engaged"]),
              (0,30), cv.FONT_HERSHEY_DUPLEX, 1, (0,0,0), 2)

In [8]:
def assign_format(predicted_state):
    if predicted_state == 0:
        state = "bored"
        color = (0,0,255)
    elif predicted_state == 1:
        state = "neutral"
        color = (255,255,255)
    else:
        state = "engaged"
        color = (0,255,0)
    return state, color


In [9]:
def ask_input():
    option = -1
    while option != 0 and option != 1:
        option = int(input("Choose input device: \n0 - Webcam \n1 - Screen\n"))
    return option

In [10]:
def take_frame(option, video):
    if option == 0:
        
        _, frame = video.read()
    else:
        screenshot = pyautogui.screenshot()
        frame = np.array(screenshot)
        frame = cv.resize(frame, (1280, 720))
        frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
        
    return frame

In [11]:
def main():
    nframe = 0
    state = ""
    percentage = 0
    color = (0,0,0)
    detected_faces = dict()
    video = cv.VideoCapture(0)
    
    option = ask_input()

    while True: 
        frame = take_frame(option, video)
        faces_detected = detect_faces(frame)

        if type(faces_detected) is not tuple:    
            if nframe % 5 == 0:
                detected_faces, counter, face_number = init_values(faces_detected, detected_faces)
                for face in faces_detected:
                    predicted_state, percentage = predict_state(face, frame)

                    state, color = assign_format(predicted_state)

                    detected_faces["msg"].append(state + " " + str(percentage) + "%")
                    detected_faces["color"].append(color)
                    counter[state] += 1
                    face_number += 1
                    
            if "faces" in detected_faces:
                draw_text(detected_faces, frame, counter)

        nframe += 1
        cv.imshow("VITAL EMO - Q to stop", frame)


        key = cv.waitKey(1)

        if key == ord('q'):
            break

    video.release()
    cv.destroyAllWindows()

In [13]:
main()

Choose input device: 
0 - Webcam 
1 - Screen
 1
