In [37]:
import cv2
import numpy as np
import dlib
from PIL import Image, ImageEnhance
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path

from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import cosine

from collections import defaultdict

In [38]:
ENHANCE_FACTOR = 3
ADJUSTMENT = 30
RESIZE_FACTOR = 2
hogFaceDetector = dlib.get_frontal_face_detector()

In [39]:
# face procesing
def extract_bounding_points(rect):
    bl_corner = rect.bl_corner()
    tr_corner = rect.tr_corner()
    return [(bl_corner.x,bl_corner.y ), (tr_corner.x,tr_corner.y )]



def enhance(im, factor=ENHANCE_FACTOR):
    enhancer = ImageEnhance.Sharpness(Image.fromarray(im))
    return np.array(enhancer.enhance(factor))



def get_face(im, 
             desired_face_width=None,
             desired_face_height=None,
             checkShape = False,
             center = None,
             returnCenter = False,
             adjust = ADJUSTMENT):
    
    faceRect = hogFaceDetector(im, 0)[0]
    height, width, _ = im.shape
    face_height, face_width = faceRect.bottom() - faceRect.top(), faceRect.right() - faceRect.left()
    
    
    if checkShape:
        center_x, center_y = center
        
        face_center_x = faceRect.left() + face_width // 2
        face_center_y = faceRect.top() + face_height // 2

        x_displacement = center_x - face_center_x
        y_displacement = center_y - face_center_y
        
        height_diff = (desired_face_height - face_height) // 2
        width_diff = (desired_face_width - face_width) // 2

        height_adj = desired_face_height - (face_height + height_diff)
        width_adj = desired_face_width - (face_width + width_diff)
    else:
        height_diff = width_diff = height_adj = width_adj = adjust
        x_displacement = y_displacement = 0
    
    faceTop = faceRect.top()-height_diff + y_displacement
    faceBottom = faceRect.bottom() + height_adj + y_displacement
    faceLeft = faceRect.left()  - width_diff + x_displacement
    faceRight = faceRect.right() + width_adj + x_displacement
        
    face = im[max(faceTop, 0) : min(faceBottom, height),
              max(faceLeft, 0) : min(faceRight, width)]
    
    if returnCenter:
        face_center_x = faceLeft + (faceRight - faceLeft) // 2
        face_center_y = faceTop + (faceBottom - faceTop) // 2
        return face, (face_center_x, face_center_y)

    return face


def preprocess_face(face, 
                     resize=False,
                     resize_factor=RESIZE_FACTOR,
                     enhance_factor=ENHANCE_FACTOR,
                     get_landmarks=False):
    


    coloredFace = enhance(face, factor=enhance_factor)
    grayFace = cv2.cvtColor(coloredFace, cv2.COLOR_BGR2GRAY)
    
    if resize:
        height, width = grayFace.shape
        grayFace = cv2.resize(grayFace, (int(height * resize_factor), int(width * resize_factor)))    
        coloredFace = cv2.resize(coloredFace, (int(height * resize_factor), int(width * resize_factor)))

    if get_landmarks:
        landmarks = faceAligner.get_landmarks(grayFace)[0]
        return coloredFace, grayFace, landmarks[:27]
    return coloredFace, grayFace

In [40]:
def check_displacement(displacement, eps_thresh, eps_adjust):
    norm = np.linalg.norm(displacement)
    if norm > eps_thresh:
        displacement = displacement * eps_adjust / norm
    return displacement

In [23]:
# output processing
def record_video(array, path):
    height, width = array[0].shape[0], array[0].shape[1] 
    size = (width,height)
    out = cv2.VideoWriter(path, \
                                cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

    for i in range(len(array)):
        out.write(array[i])
    out.release()


In [24]:
def gray(img, threshold=127):
    image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return cv2.threshold(image,threshold,255,cv2.THRESH_BINARY)[1]
def color(img):
    return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

In [25]:
def shift_image(img, shift_x, shift_y):
    image = img.copy()
    if shift_y > 0:
        image[shift_y:] = image[:len(image)-shift_y]
        image[:shift_y] = 0
    if shift_y < 0:
        image[:len(image)-abs(shift_y)] = image[abs(shift_y):]
        image[len(image)-abs(shift_y):] = 0
    
    if shift_x > 0:
        image[:, shift_x:] = image[:, :len(image[0])-shift_x]
        image[:, :shift_x] = 0
    if shift_x < 0:
        image[:, :len(image[0])-abs(shift_x)] = image[:, abs(shift_x):]
        image[:, len(image[0])-abs(shift_x):] = 0
    return image

In [26]:
from numpy.fft import fft2, ifft2
import numpy as np


def conv2d(a, f):
    s = f.shape + tuple(np.subtract(a.shape, f.shape) + 1)
    strd = np.lib.stride_tricks.as_strided
    subM = strd(a, shape = s, strides = a.strides * 2)
    return np.einsum('ij,ijkl->kl', f, subM)

In [27]:
def crop(img):
    
    indices = np.argwhere(img==255)
    y_min = indices[:, 0].min()
    y_max = indices[:, 0].max()
    x_min = indices[:, 1].min()
    x_max = indices[:, 1].max()
    height = y_max - y_min
    width = x_max - x_min
    return img[y_min + height * 2 // 5: y_max, x_min + width * 1 // 5 :x_max - width * 1 // 5], \
            (x_min + width * 1 // 5, y_min + height * 2 // 5)

In [28]:
def find_center(eye_img):
    radius = min(eye_img.shape) // 4
    element = np.zeros((2 * radius + 1, 2 * radius + 1))
    for i in range(radius + 1):
        cv2.circle(element, (radius, radius), i, 1, 1)

    s = np.sum(element)
    element = element / s

    dst = -eye_img.copy() + 255 
    dst = np.pad(dst, radius)
    dst = conv2d(dst.astype(np.int32), element)


#     ret, th = cv2.threshold(dst, dst.max()-1, 255, cv2.THRESH_BINARY)
#     ind = np.unravel_index(np.argmax(dst, axis=None), dst.shape)

    rows, cols = np.where(dst == np.max(dst))
#     return np.array((rows, cols)).mean(axis=1).astype(int)[::-1] #- (radius, radius)
    center =  np.array((rows, cols)).mean(axis=1).astype(int)[::-1] #- (radius, radius)
    return center, radius

In [29]:
def shape_to_np(shape, dtype="int"):
    coords = np.zeros((68, 2), dtype=dtype)
    for i in range(0, 68):
        coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords

In [53]:
def proceed_video_otsu(path):
    vidcap = cv2.VideoCapture(path)
    flag = 1
    success, result = vidcap.read()
    success, result = vidcap.read()
    initialFace, center = get_face(result, returnCenter=True)

    desired_height, desired_width, _ = initialFace.shape
    eye_cascade = cv2.CascadeClassifier('cascades/gaze/haarcascade_eye.xml')

    count = 0
    orig_array = []
    dist_array = []
    eyes_array  = []
    diff_array = []
    face_array = []
    mask_array = []
    res_array = []
    double_eyes_array = []
    eye_mask_array = []
    
    centers_array = []
    eye_centers_array = []
    radius_array = []
    eye_radius_array = []
    while count < 1000:
        success,image = vidcap.read()
        if not success:
            break
        count += 1
        if count == 90:
             cv2.imwrite('11.png', image)
        currentFace = get_face(image,
                             desired_face_width=desired_width,
                             desired_face_height=desired_height,
                             checkShape=True,
                             center = center)
        if flag ==1 and count == 90:
            cv2.imwrite('1.png', currentFace)
            flag += 1
        currentFace, currentFace_gray = preprocess_face(currentFace, resize=False)
        
        if flag ==2:
            cv2.imwrite('2.png', currentFace_gray)
            flag += 1

        eyes = eye_cascade.detectMultiScale(currentFace_gray, minNeighbors = 5, scaleFactor=1.1,  minSize=(desired_width//5, desired_width//5), maxSize=(desired_width//5+15, desired_width//5+15))
        
        left_center = np.zeros(2)
        right_center = np.zeros(2)
        
        eye_left_center = np.zeros(2)
        eye_right_center = np.zeros(2)
        
        left_radius = 0
        right_radius = 0
        eye_left_radius = 0
        eye_right_radius = 0
        
        eye_mask = np.zeros_like(currentFace)
        borders_left = []
        borders_right = []
        face = color(currentFace_gray)
        if len(eyes)==2:
            
            left = min([0,1], key = lambda x: eyes[x][0])
            eye_left_center = np.array([eyes[left][0] + eyes[left][2] / 2, eyes[left][1] + eyes[left][3] / 2])
            eye_right_center = np.array([eyes[1-left][0] + eyes[1-left][2] / 2, eyes[1-left][1] + eyes[1-left][3] / 2])
            
            eye_left_radius = eyes[left][2] / 4
            eye_right_radius = eyes[1-left][2] / 4
            
            for eye_x, eye_y, eye_w, eye_h in eyes:
                borders_left.append( eye_x)
                borders_right.append(eye_x + eye_w)
                eye_mask[eye_y : eye_y + eye_h, eye_x : eye_x + eye_w] = 1
            eyes = np.multiply(currentFace, eye_mask)
            if flag ==3:
                cv2.imwrite('3.png', eyes)
                flag += 1
            
            double_eyes = eyes.copy()
            double_eyes = enhance(double_eyes,0.1)
            double_eyes = cv2.cvtColor(double_eyes, cv2.COLOR_BGR2GRAY)

            blur = cv2.GaussianBlur(double_eyes,(3,3),0)
            ret3,th3 = cv2.threshold(blur,40,255,cv2.THRESH_OTSU)
            if flag ==4:
                cv2.imwrite('4.png', th3)
                flag += 1
            res = th3.copy()

            try:
                height, width = res.shape
                dist = int(min(borders_right) + max(borders_left)) // 2 

                left_eye = res[:, :dist]
                right_eye = res[:, dist:]

                left_eye, left_tl = crop(left_eye)
                right_eye, right_tl = crop(right_eye)

                left_center, left_radius = find_center(left_eye)
                right_center, right_radius = find_center(right_eye)

                x = left_tl[0] + left_center[0]
                y = left_tl[1] + left_center[1]
                left_center = np.array((x,y))
                cv2.circle(face, (x,y), 2, (0, 0, 255), -1)

                x = right_tl[0] + right_center[0] + dist
                y = right_tl[1] + right_center[1]
                right_center = np.array((x,y))
                
                cv2.circle(face, (x,y), 2, (0, 0, 255), -1)
                if flag ==5:
                    cv2.imwrite('5.png', face)
                    flag += 1
                previous_left_center = left_center.copy()
                previous_right_center = right_center.copy()

            except Exception as e:
                print(e)



        face_array.append(face)
        centers_array.append([left_center, right_center])
        eye_centers_array.append([eye_left_center, eye_right_center])
        radius_array.append([left_radius, right_radius])
        eye_radius_array.append([eye_left_radius, eye_right_radius])
        

    vidcap.release()

    return face_array, centers_array, eye_centers_array, radius_array, eye_radius_array



In [42]:
eye_cascade = cv2.CascadeClassifier('cascades/gaze/haarcascade_eye.xml')


In [74]:
%%time
face_array, centers_array, eye_centers_array, radius_array, eye_radius_array = proceed_video_otsu('videos/8.avi')

CPU times: user 3min 5s, sys: 9.83 s, total: 3min 15s
Wall time: 2min 31s


In [75]:
record_video(face_array, f'results/face2.mp4')


from IPython.display import Video
Video(f'results/face.mp4', embed=True)

In [34]:
def get_kernel_res(eye_img):
    radius = min(eye_img.shape) // 2
    element = np.zeros((2 * radius + 1, 2 * radius + 1))
    for i in range(radius + 1):
        cv2.circle(element, (radius, radius), i, 1, 1)

    s = np.sum(element)
    element = element / s

    dst = eye_img.copy() 
    dst = np.pad(dst, radius)
    dst = conv2d(dst.astype(np.int32), element)

    rows, cols = np.where(dst == np.max(dst))
    center =  np.array((rows, cols)).mean(axis=1).astype(int)[::-1] #- (radius, radius)
    return center, dst, dst.max(), len(rows), np.array((rows, cols)).std(axis=1), (rows, cols)

In [70]:
def proceed_video_subtractor(path):
    vidcap = cv2.VideoCapture(path)
    landmarks_predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    success = False
    count = 0
    flag = 0
#     while not success:
        
    image_success, image = vidcap.read()
    image_success, image = vidcap.read()
    
    res = get_face(image, returnCenter=True)
    
#     if res is None:
#         print(f"Face was not detected, frame number {count}")
#         count += 1
#         continue

    initialFace, center = res
    success = True
    desired_height, desired_width, _ = initialFace.shape
    previousFace, previousFace_gray = preprocess_face(initialFace, resize=False,
                                                                    get_landmarks=False)

    eye_cascade = cv2.CascadeClassifier('cascades/gaze/haarcascade_eye.xml')


    centers_array = []
    left_eye_array = []
    right_eye_array = []
    image_array = []
    diff_array = []
    while success:
        success,image = vidcap.read()
        if not success:
            break
        count += 1
        if count == 90:
            cv2.imwrite('image.png', currentFace)
            

        currentFace = get_face(image,
                             desired_face_width=desired_width,
                             desired_face_height=desired_height,
                             checkShape=True,
                             center = center)
        
        if flag ==0 and count == 90:
            cv2.imwrite('12.png', currentFace)
            flag += 1
            
        if currentFace is None:
            print(f"Face was not detected, frame number {count}")
            continue
        currentFace, currentFace_gray = preprocess_face(currentFace, resize=False)
        
        if flag ==1:
            cv2.imwrite('22.png', currentFace_gray)
            flag += 1


        eyes = eye_cascade.detectMultiScale(currentFace_gray, minNeighbors = 5, scaleFactor=1.1,  minSize=(desired_width//5, desired_width//5))

    

        subtractor = cv2.bgsegm.createBackgroundSubtractorCNT(minPixelStability = 1, maxPixelStability = -1, isParallel=True, useHistory=False)

        frame = cv2.UMat(previousFace)
        bgs = subtractor.apply(frame)

        frame = cv2.UMat(currentFace)
        diff = subtractor.apply(frame)
        
        if flag ==2:
            cv2.imwrite('32.png', diff)
            flag += 1

        diff = diff.get()
        diff_array.append(diff)
        
        shape = landmarks_predictor(currentFace_gray, dlib.rectangle(0, 0, currentFace_gray.shape[1], previousFace_gray.shape[0] ))
        
            
        landmarks = shape_to_np(shape)
        left_contour = landmarks[36:42]

        overlay = np.zeros_like(diff)
        cv2.drawContours(overlay, [left_contour], -1, 1, -1)
        left_eye = np.multiply(diff, overlay)
        

        right_contour = landmarks[42:48]

        overlay = np.zeros_like(diff)
        cv2.drawContours(overlay, [right_contour], -1, 1, -1)
        right_eye = np.multiply(diff, overlay)
        
        if flag ==3:
            cv2.imwrite('42.png', left_eye + right_eye)
            flag += 1
        
        left_eye_array.append(left_eye)
        right_eye_array.append(right_eye)
        

        left_eye, left_tl = crop(left_eye)
        right_eye, right_tl = crop(right_eye)

        left_center, left_eye_kernel, left_max, num_left_max, std_left, (left_rows, left_cols) = get_kernel_res(left_eye)
        right_center, right_eye_kernel, right_max, num_right_max, std_right, (right_rows, right_cols) = get_kernel_res(right_eye)

        


        img = currentFace.copy()
        cv2.circle(img, (left_center + left_tl).astype(int), 2, (0, 0, 255), -1)
        cv2.circle(img, (right_center + right_tl).astype(int), 2, (0, 0, 255), -1)

        if flag ==4:
            cv2.imwrite('52.png', img)
            flag += 1
            
        centers_array.append([left_center, right_center])
        image_array.append(img)



    vidcap.release()
    return centers_array, image_array, diff_array, left_eye_array, right_eye_array


In [72]:
%%time
centers_array, image_array, diff_array, left_eye_array, right_eye_array = proceed_video_subtractor('videos/8.avi')

CPU times: user 3min 4s, sys: 7.63 s, total: 3min 11s
Wall time: 2min 32s


In [73]:
record_video(image_array, 'test2.mp4')
from IPython.display import Video
Video('test.mp4', embed=True)

In [24]:
record_video(list(map(color, diff_array)), 'test.mp4')
from IPython.display import Video
Video('test.mp4', embed=True)