In [1]:
from imutils import face_utils
import numpy as np
import imutils
import dlib
import cv2
import matplotlib.pyplot as plt

In [2]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [3]:
#points are in some arbitrary reference frame / coordinate system. 
#This is called the World Coordinates ( a.k.a Model Coordinates in OpenCV docs ) .
model_points = np.array([
                            (0.0, 0.0, 0.0),             # Nose tip
                            (0.0, -330.0, -65.0),        # Chin
                            (-225.0, 170.0, -135.0),     # Left eye left corner
                            (225.0, 170.0, -135.0),      # Right eye right corne
                            (-150.0, -150.0, -125.0),    # Left Mouth corner
                            (150.0, -150.0, -125.0)      # Right mouth corner
                        ], dtype=np.float32)

In [4]:
focal_length = 640
center = (640/2, 480/2)
camera_matrix = np.array(
                         [[focal_length, 0, center[0]],
                         [0, focal_length, center[1]],
                         [0, 0, 1]], dtype = "double"
)

In [5]:
video_capture = cv2.VideoCapture(0)
video_capture.set(3, 640) #WIDTH
video_capture.set(4, 480) #HEIGHT


while True:
    ret, frame = video_capture.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    rects = detector(gray, 0)
        
    if len(rects) > 0:
        text = "{} face(s) found".format(len(rects))
        cv2.putText(frame, text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
        
    for rect in rects:
        (bX, bY, bW, bH) = face_utils.rect_to_bb(rect)
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
    
        for (i, (x, y)) in enumerate(shape):
                cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

        image_points = np.array([
                        (shape[30][0], shape[30][1]),
                        (shape[8][0], shape[8][1]),
                        (shape[45][0], shape[45][1]),
                        (shape[36][0], shape[36][1]),
                        (shape[54][0], shape[54][1]),
                        (shape[48][0], shape[48][1])
                        
        ], dtype=np.float32)
        
        dist_coeffs = np.zeros((4,1))

        (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs)

        (nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs)

        
        cv2.circle(frame, (int(shape[30][0]), int(shape[30][1])), 3, (0,0,255), -1)
        p1 = ( int(shape[30][0]), int(shape[30][1]))
        p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))

        cv2.line(frame, p1, p2, (255,0,0), 2)

        #BONUS
        rotation_mat = cv2.Rodrigues(rotation_vector)[0];
        pose_mat = np.hstack((rotation_mat, translation_vector));
        euler_angle = cv2.decomposeProjectionMatrix(pose_mat)[6];
        
        text = "{} Pitch".format(euler_angle[0])
        cv2.putText(frame, text, (10, 40), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
        text = "{} Yaw".format(euler_angle[1])
        cv2.putText(frame, text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)
        text = "{} Roll".format(euler_angle[2])
        cv2.putText(frame, text, (10, 80), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 2)

        
    cv2.imshow('Video', (frame))
    if cv2.waitKey(1) & 0xFF == ord('q'):break
video_capture.release()
cv2.destroyAllWindows() 