In [1]:
import tqdm
import glob
import cv2
import dlib
import math
import numpy as np

from collections import OrderedDict

In [2]:
SOURCE_PATH = "dgw/input"
RESULT_PATH = "dgw/preprocessed"

FACE_DETECTOR_MODEL = "pre_trained_models/mmod_human_face_detector.dat"

In [3]:
class Point:
    def __init__(self, X, Y):
        self.x = X
        self.y = Y

class C_Rectangle:
    def __init__(self, topLeft, bottomRight):
        self.topLeft = topLeft
        self.bottomRight = bottomRight
        
        self.height = topLeft.y - bottomRight.y
        self.width = bottomRight.x - topLeft.x
    
    def get_area(self):
        area = self.height * self.width
        return abs(area)
    

In [4]:
def boundary_check(val, min_threshold=None, max_threshold=None):
    if min_threshold is not None and val < min_threshold:
        val = min_threshold
    elif max_threshold is not None and val > max_threshold:
        val = max_threshold
    return val

In [5]:
def get_facial_landmarks(img):
    import face_alignment
    
    face_detector = face_alignment.FaceAlignment(
        face_alignment.LandmarksType._2D,
        flip_input=False,
        device='cpu'
    )

    facial_landmarks = face_detector.get_landmarks(img)
    
    return facial_landmarks

In [6]:
def get_driver_face(img):
    upsample_num = 1

    hog_face_detector = dlib.get_frontal_face_detector()
    cnn_face_detector = dlib.cnn_face_detection_model_v1(FACE_DETECTOR_MODEL)
    
    detected_faces = hog_face_detector(img, 1)
    is_using_cnn = False
    if not detected_faces:
        detected_faces = cnn_face_detector(img, upsample_num)
        is_using_cnn = True

    if not detected_faces:
        print("Unable to detect any faces at {}".format("ABC"))
    
    main_face = None
    main_face_area = 0
    for i in range(len(detected_faces)):
        current_face = detected_faces[i]
        if is_using_cnn:
            current_face = detected_faces[i].rect
        
        top_left = Point(
            boundary_check(current_face.left(), 0, img.shape[1]), 
            boundary_check(current_face.top(), 0, img.shape[0])
        )
        bottom_right = Point(
            boundary_check(current_face.right(), 0, img.shape[1]), 
            boundary_check(current_face.bottom(), 0, img.shape[0])
        )
    
        current_face_area = C_Rectangle(top_left, bottom_right).get_area()
        if not main_face or current_face_area > main_face_area:
            main_face = current_face
            main_face_area = current_face_area

    return img[
        boundary_check(current_face.top(), 0, img.shape[0]):boundary_check(current_face.bottom(), 0, img.shape[1]), 
        boundary_check(current_face.left(), 0, img.shape[0]):boundary_check(current_face.right(), 0, img.shape[1])]

In [7]:
def face_orientation(frame, landmarks):
    size = frame.shape #(height, width, color_channel)

    image_points = np.array([
                            landmarks[33],     # Nose tip
                            landmarks[8],      # Chin
                            landmarks[36],     # Left eye left corner
                            landmarks[45],     # Right eye right corne
                            landmarks[48],     # Left Mouth corner
                            landmarks[54]      # Right mouth corner
                        ], dtype=float)
                        
    model_points = np.array([
                            (0.0, 0.0, 0.0),             # Nose tip
                            (0.0, -330.0, -65.0),        # Chin
                            (-165.0, 170.0, -135.0),     # Left eye left corner
                            (165.0, 170.0, -135.0),      # Right eye right corne
                            (-150.0, -150.0, -125.0),    # Left Mouth corner
                            (150.0, -150.0, -125.0)      # Right mouth corner                         
                        ], dtype=float)

    # Camera internals
    center = (size[1]/2, size[0]/2)
    focal_length = center[0] / np.tan(60/2 * np.pi / 180)
    camera_matrix = np.array(
                         [[focal_length, 0, center[0]],
                         [0, focal_length, center[1]],
                         [0, 0, 1]], dtype = float
                         )

    dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion

    (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)

    
    axis = np.float32([[500,0,0], 
                          [0,500,0], 
                          [0,0,500]])
                          
    imgpts, jac = cv2.projectPoints(axis, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
    modelpts, jac2 = cv2.projectPoints(model_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
    rvec_matrix = cv2.Rodrigues(rotation_vector)[0]

    proj_matrix = np.hstack((rvec_matrix, translation_vector))
    eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6] 

    
    pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]


    pitch = math.degrees(math.asin(math.sin(pitch)))
    roll = -math.degrees(math.asin(math.sin(roll)))
    yaw = math.degrees(math.asin(math.sin(yaw)))

    return imgpts, modelpts, str(int(roll)), str(int(pitch)), str(int(yaw))


In [14]:
def get_extra_padding(landmarks):
    return np.linalg.norm(landmarks[38] - landmarks[20])

def get_driver_eyes(landmarks):
    
    FACIAL_LANDMARKS_IDXS = OrderedDict([
        ("mouth", (48, 68)),
        ("right_eyebrow", (17, 22)),
        ("left_eyebrow", (22, 27)),
        ("right_eye", (36, 42)),
        ("left_eye", (42, 48)),
        ("nose", (27, 35)),
        ("jaw", (0, 17))
    ])
    
    left_eye = landmarks[FACIAL_LANDMARKS_IDXS["left_eye"][0]:FACIAL_LANDMARKS_IDXS["left_eye"][1]]
    right_eye = landmarks[FACIAL_LANDMARKS_IDXS["right_eye"][0]:FACIAL_LANDMARKS_IDXS["right_eye"][1]]
    left_eyebrow = landmarks[FACIAL_LANDMARKS_IDXS["left_eyebrow"][0]:FACIAL_LANDMARKS_IDXS["left_eyebrow"][1]]
    right_eyebrow = landmarks[FACIAL_LANDMARKS_IDXS["right_eyebrow"][0]:FACIAL_LANDMARKS_IDXS["right_eyebrow"][1]]

    eyes = np.concatenate((left_eye, right_eye, left_eyebrow, right_eyebrow))
    top_left_x = min(eyes, key = lambda t: t[0])[0] - get_extra_padding(landmarks)
    top_left_y = min(eyes, key = lambda t: t[1])[1]
    
    bottom_right_x = max(eyes, key = lambda t: t[0])[0] + get_extra_padding(landmarks)
    bottom_right_y = max(eyes, key = lambda t: t[1])[1] + get_extra_padding(landmarks)
    
    return ((top_left_x, top_left_y), (bottom_right_x, bottom_right_y))

In [15]:
for file_path in glob.glob("{src}/[0-9]/*.png".format(src=SOURCE_PATH)):
    path_info = file_path.split("/")
    IMAGE_NAME = path_info[-1].replace(".png", "")
    ZONE_NAME = path_info[-2]
    
    img = dlib.load_rgb_image(file_path)
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    face_img = get_driver_face(img)
    op_path = "{dir}/{zone}/cropped_{img}.png".format(dir=RESULT_PATH, zone=ZONE_NAME, img=IMAGE_NAME)
    print(op_path)

    cv2.imwrite(op_path, face_img)

    landmarks = get_facial_landmarks(face_img)    
#     for i in range(len(landmarks[0])):
#         print(i, landmarks[0][i])

    eyes_boundng_box = get_driver_eyes(landmarks[0])
    eye_image = cv2.rectangle(face_img, eyes_boundng_box[0], eyes_boundng_box[1], (255, 0, 0) , 2) 
    op_path = "{dir}/{zone}/eye_{img}.png".format(dir=RESULT_PATH, zone=ZONE_NAME, img=IMAGE_NAME)
    print(op_path)
    cv2.imwrite(op_path, eye_image)
    

    imgpts, modelpts, roll, pitch, yaw = face_orientation(face_img, landmarks[0])
    cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
    cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[0].ravel()), (255,0,), 3) #BLUE
    cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[2].ravel()), (0,0,255), 3) #RED
    op_path = "{dir}/{zone}/headpose_{img}.png".format(dir=RESULT_PATH, zone=ZONE_NAME, img=IMAGE_NAME)
    cv2.imwrite(op_path, face_img)
    print(roll, pitch, yaw)

-----
346 190
698 542
-----
dgw/preprocessed/1/cropped_fresh.png


  eye_image = cv2.rectangle(face_img, eyes_boundng_box[0], eyes_boundng_box[1], (255, 0, 0) , 2)
  cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
  cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[0].ravel()), (255,0,), 3) #BLUE
  cv2.line(face_img, tuple(landmarks[0][33]), tuple(imgpts[2].ravel()), (0,0,255), 3) #RED


dgw/preprocessed/1/eye_fresh.png
-63 39 -71
-----
47 244
251 447
-----
dgw/preprocessed/1/cropped_c.png
dgw/preprocessed/1/eye_c.png
-7 -4 70
-----
111 82
379 350
-----
dgw/preprocessed/1/cropped_1.png
dgw/preprocessed/1/eye_1.png
0 -16 1
-----
390 184
545 339
-----
dgw/preprocessed/3/cropped_frame257.png
dgw/preprocessed/3/eye_frame257.png
-2 -19 -3
-----
159 0
345 180
-----
dgw/preprocessed/3/cropped_fresh.png
dgw/preprocessed/3/eye_fresh.png
-5 -6 -34
-----
111 82
379 350
-----
dgw/preprocessed/3/cropped_3.png
dgw/preprocessed/3/eye_3.png
0 -16 1
-----
45 10
81 46
-----
-----
161 42
197 78
-----
dgw/preprocessed/2/cropped_d.png
dgw/preprocessed/2/eye_d.png
5 -15 -34
-----
111 82
379 350
-----
dgw/preprocessed/2/cropped_2.png
dgw/preprocessed/2/eye_2.png
0 -16 1
