In [0]:
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
import os
#os.chdir("content")
os.chdir("drive")
os.chdir("My Drive")
os.chdir("Facial Emotion Recognition")

In [0]:
!pip install tensorflow==1.12.0
import tensorflow as tf
print(tf.__version__)

In [0]:
from statistics import mode

import cv2
from keras.models import load_model
import numpy as np
import PIL
from utils.datasets import get_labels
from utils.inference import detect_faces
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.preprocessor import preprocess_input
import pickle

In [0]:
!pwd

In [0]:
detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml'
emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5'
gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5'
gender_labels = get_labels('imdb')
emotion_labels = get_labels('fer2013')

In [0]:
frame_window = 10
emotion_offsets = (20, 40)
gender_offsets = (30, 60)

In [0]:
face_detection = load_detection_model(detection_model_path)
gender_classifier = load_model(gender_model_path, compile=False)
emotion_classifier = load_model(emotion_model_path, compile=False)

In [0]:
emotion_target_size = emotion_classifier.input_shape[1:3]
gender_target_size = gender_classifier.input_shape[1:3]

In [0]:
print(emotion_target_size)
print(gender_target_size)

In [0]:
!pip install face_recognition

In [0]:
import face_recognition

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import sys
%matplotlib inline

In [0]:
sample_video_path = "../../Diego_Luna_interview2.mp4"

In [0]:
# starting lists for calculating modes
#emotion_window = []

# starting video streaming
#cv2.namedWindow('window_frame')
video_capture = cv2.VideoCapture(sample_video_path)

In [0]:
tot = 0
while True:
    ret, bgr_image = video_capture.read()
    if ret == False:
        break
    tot += 1
print(tot)

In [0]:
face_data = []

In [0]:
!ls

In [0]:
#@title process1 { form-width: "20%" }
tot_frames = 0
face_index = 0
while True:
    ret, bgr_image = video_capture.read()
    
    if ret == False:
        break
    
    
    frame_time =  video_capture.get(cv2.CAP_PROP_POS_MSEC)
    
    #if tot_frames > 1000:
    #    break
    
    gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
    faces = detect_faces(face_detection, gray_image)
    tot_frames += 1
    #print("gray_image shape: ",gray_image.shape)

    for face_coordinates in faces:
        
        face_index += 1
        
        face_dict = {}
        
        #face_dict["rgb_image"] = rgb_image
        face_dict["bb"] = face_coordinates
        face_dict["face_index"] = face_index
        face_dict["frame_number"] = tot_frames
        
        a,b,c,d = face_coordinates
        
        
        encodings = face_recognition.face_encodings(rgb_image, [(a,b,a+c, b+d)])

        #print("encoding shape: ", encodings[0].shape)
        face_dict["encodings"] = encodings[0]
        
        
        x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets)
        
        gray_face = gray_image[y1:y2, x1:x2]
        try:
            gray_face = cv2.resize(gray_face, (emotion_target_size))
        except:
            continue

        gray_face = preprocess_input(gray_face, True)
        gray_face = np.expand_dims(gray_face, 0)
        gray_face = np.expand_dims(gray_face, -1)
        emotion_prediction = emotion_classifier.predict(gray_face)
        emotion_probability = np.max(emotion_prediction)
        emotion_label_arg = np.argmax(emotion_prediction)
        emotion_text = emotion_labels[emotion_label_arg]
        
        
        
        face_dict["emotion_text"] = emotion_text
        face_dict["frame_time"] = frame_time
        
        face_data.append(face_dict)
        print("face_data len: %d"%len(face_data))
        if tot_frames % 1 == 0:
            print("Face_index: ", face_index)
            print("Frame time: ", frame_time)
            print("Face emotion: ", emotion_text)
            print("Frame_number: ", tot_frames)
        
            #rgb_image_copy = rgb_image.copy()
        
            #cv2.rectangle(rgb_image_copy, (a, b), (a+c, b+d), (255, 0, 0), 2)
            #cropped_face = rgb_image_copy[b:b+d, a:a+c,:]
            #print(cropped_face.shape)
            #plt.axis("off")
            #plt.imshow(cropped_face)
            #plt.show()
        
        '''
        emotion_window.append(emotion_text)

        if len(emotion_window) > frame_window:
            emotion_window.pop(0)
        try:
            emotion_mode = mode(emotion_window)
        except:
            continue

       
        if emotion_text == 'angry':
            color = emotion_probability * np.asarray((255, 0, 0))
        elif emotion_text == 'sad':
            color = emotion_probability * np.asarray((0, 0, 255))
        elif emotion_text == 'happy':
            color = emotion_probability * np.asarray((255, 255, 0))
        elif emotion_text == 'surprise':
            color = emotion_probability * np.asarray((0, 255, 255))
        else:
            color = emotion_probability * np.asarray((0, 255, 0))

        color = color.astype(int)
        color = color.tolist()

        draw_bounding_box(face_coordinates, rgb_image, color)
        draw_text(face_coordinates, rgb_image, emotion_mode,
                  color, 0, -45, 1, 1)
        '''
        
        
        
        
    #bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
    #cv2.imshow('window_frame', bgr_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
        


In [0]:
#@title process2 { form-width: "20%" }
tot_frames = 0
face_index = 0
while True:
    ret, bgr_image = video_capture.read()
    
    if ret == False:
        break
    
    frame_time =  video_capture.get(cv2.CAP_PROP_POS_MSEC)
    if frame_time > 25000:
        break
    
    #if tot_frames > 1000:
    #    break
    
    gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
    face_locations = face_recognition.face_locations(rgb_image, model="cnn")
    tot_frames += 1
    
    maxh, maxw = rgb_image.shape[:2]
    print("maxh: {0}, maxw: {1}".format(maxh,maxw))

    for top, right, bottom, left in face_locations:
        
        face_index += 1
        
        face_dict = {}
        
        #face_dict["rgb_image"] = rgb_image
        face_dict["bb"] = (top, right, bottom, left)
        face_dict["face_index"] = face_index
        face_dict["frame_number"] = tot_frames
        
        
        
        encodings = face_recognition.face_encodings(rgb_image, [(top, right, bottom, left)])
        
        face_dict["encodings"] = encodings[0]
        
        x_off, y_off = emotion_offsets
        #n_left, n_right, n_top, n_bottom = apply_offsets((left,top,right-left,bottom-top), emotion_offsets)
        n_left, n_right, n_top, n_bottom = (max(left - x_off,0), min(right + x_off, maxw), max(top - y_off,0), min(bottom + y_off, maxh))
        print("top: {0}\tright: {1}\tbottom: {2}\tleft: {3}".format(top, right, bottom, left))
        print("n_top: {0}\tn_right: {1}\tn_bottom: {2}\tn_left: {3}".format(n_top, n_right, n_bottom, n_left))
        
        gray_face = gray_image[n_top:n_bottom, n_left:n_right]
        try:
            gray_face = cv2.resize(gray_face, (emotion_target_size))
        except:
            continue

        gray_face = preprocess_input(gray_face, True)
        gray_face = np.expand_dims(gray_face, 0)
        gray_face = np.expand_dims(gray_face, -1)
        emotion_prediction = emotion_classifier.predict(gray_face)
        emotion_probability = np.max(emotion_prediction)
        emotion_label_arg = np.argmax(emotion_prediction)
        emotion_text = emotion_labels[emotion_label_arg]
        face_dict["emotion_text"] = emotion_text
        
        x_off, y_off = gender_offsets
        n_left, n_right, n_top, n_bottom = (max(left - x_off,0), min(right + x_off, maxw), max(top - y_off,0), min(bottom + y_off, maxh))
        rgb_face = rgb_image[n_top:n_bottom, n_left:n_right]
        try:
            rgb_face = cv2.resize(rgb_face, (gender_target_size))
        except:
            continue

        rgb_face = preprocess_input(rgb_face, True)
        rgb_face = np.expand_dims(rgb_face, 0)
        gender_prediction = gender_classifier.predict(rgb_face)
        gender_label_arg = np.argmax(gender_prediction)
        gender_text = gender_labels[gender_label_arg]
        face_dict["gender_text"] = gender_text

        face_dict["frame_time"] = frame_time
        
        face_data.append(face_dict)
        print("face_data len: %d"%len(face_data))
        if tot_frames % 1 == 0:
            print("face_index: ",face_index)
            print("frame_time: ", frame_time)
            print("emotion_text: ", emotion_text)
            print("frame_number: ", tot_frames)
            print("gender_text: ", gender_text)
            rgb_image_copy = rgb_image.copy()
        
            cv2.rectangle(rgb_image_copy, (left, top), (right, bottom), (255, 0, 0), 2)
            cropped_face = rgb_image_copy[top:bottom, left:right,:]
            print(cropped_face.shape)
            plt.axis("off")
            plt.imshow(cropped_face)
            plt.show()
        
        '''
        emotion_window.append(emotion_text)

        if len(emotion_window) > frame_window:
            emotion_window.pop(0)
        try:
            emotion_mode = mode(emotion_window)
        except:
            continue

       
        if emotion_text == 'angry':
            color = emotion_probability * np.asarray((255, 0, 0))
        elif emotion_text == 'sad':
            color = emotion_probability * np.asarray((0, 0, 255))
        elif emotion_text == 'happy':
            color = emotion_probability * np.asarray((255, 255, 0))
        elif emotion_text == 'surprise':
            color %matplotlib inline= emotion_probability * np.asarray((0, 255, 255))
        else:
            color = emotion_probability * np.asarray((0, 255, 0))

        color = color.astype(int)
        color = color.tolist()

        draw_bounding_box(face_coordinates, rgb_image, color)
        draw_text(face_coordinates, rgb_image, emotion_mode,
                  color, 0, -45, 1, 1)
        '''
        
        
        
        
    #bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
    #cv2.imshow('window_frame', bgr_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
        


In [0]:
print(tot_frames)

In [0]:
print(index)

In [0]:
print(len(face_data))

In [0]:
f = open("face_data", "wb")
f.write(pickle.dumps(face_data))
f.close()

In [0]:
video_capture.release()

In [0]:
face_data = pickle.loads(open('face_data', "rb").read())
face_data = np.array(face_data)
encodings = [d["encodings"] for d in face_data]

In [0]:
print(len(encodings))

In [0]:
from sklearn.cluster import DBSCAN

In [0]:
clt = DBSCAN(metric="euclidean")

In [0]:
clt.fit(encodings)

In [0]:
print(clt.labels_)

In [0]:
labelIDs = np.unique(clt.labels_)
print(labelIDs)
numUniqueFaces = len(np.where(labelIDs > -1)[0])
print("[INFO] # unique faces: {}".format(numUniqueFaces))

In [0]:
face_data_classified = [[] for i in range(len(labelIDs))]

In [0]:
for idx, label in enumerate(clt.labels_):
    face_data_classified[label].append(face_data[idx])

In [0]:
id_list = []

In [0]:
for idx in range(len(labelIDs)):
    if len(face_data_classified[idx]) >= 25:
        id_list.append(idx)

In [0]:
id_list

In [0]:
frame_window = 20
emotion_window = []
#gender_window = []

In [0]:
for idx, label in enumerate(clt.labels_):
    if label in id_list:
        face_data_classified[label].append(face_data[idx])

In [0]:
for idx in range(len(labelIDs)):
    print(len(face_data_classified[idx]))