### Installing MediaPipe

In [1]:
# %%capture
# !pip install mediapipe
%matplotlib inline

### Required Libraries

In [2]:
import numpy as np
import cv2
import warnings
from matplotlib import pyplot as plt
import time
from math import cos, sin
import moviepy.editor as mpimg
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import Image 
import subprocess
import pickle
# Suppress warnings
warnings.filterwarnings('ignore')


In [3]:
mp.__version__

'0.10.9'

## Preprocessing utilities

In [4]:
def draw_axis(img, pitch,yaw,roll, tdx=None, tdy=None, size = 100):

    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

In [5]:
def preprocess(face, height = 450, width = 450):

    x_val = [lm.x*width  for lm in face]
    y_val = [lm.y*height  for lm in face]

    m_val = x_val[:468] +y_val[:468]
    
    m_val = np.array(m_val)-np.mean(m_val)

    m_val = m_val / m_val.max()
    
    return m_val

In [6]:
def get_img_with_axis(image, detector):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Process the image to extract landmark points
    image_mp = mp.Image(image_format=mp.ImageFormat.SRGB,data = image_rgb)

    results = detector.detect(image_mp)
    
    # Check if landmarks were detected
    if results.face_landmarks:
        image_with_axes2 = image
        for face in results.face_landmarks:
            angles = model.predict(preprocess(face).reshape(1,-1))
            pitch, yaw, roll = angles[0,0], angles[0,1], angles[0,2]
            center = face[4]
            image_with_axes2 = draw_axis(image_with_axes2, pitch, yaw, roll, tdx = center.x * image.shape[1], tdy = center.y * image.shape[0])
        return image_with_axes2
    return image

In [7]:
def process_video(video_path, output_path, detector):
    t1 = time.time()

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Unable to open video file.")
        return

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    
    out = cv2.VideoWriter(output_path, fourcc, frame_rate, (2*frame_width, frame_height))

    frame_count = 0
    max_frame_count = np.inf
    while frame_count < max_frame_count:
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame = get_img_with_axis(frame.copy(), detector)
        concatenated_frame = np.concatenate((frame, processed_frame), axis=1)
        out.write(concatenated_frame)
        
        frame_count += 1

    cap.release()
    out.release()


    t2 = time.time()
    processing_time = t2 - t1
    print(f"Processing time: {processing_time}, for video of length {frame_count} frames.")

def extract_audio(video_path, output_audio_path):
    video_clip = mpimg.VideoFileClip(video_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(output_audio_path)


## loading the model

In [8]:
#make sure that the model is downloaded correctly

filename = 'SVR_model.sav'
model = pickle.load(open(filename, 'rb'))


Then download the off-the-shelf model bundle(s). Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/face_landmarker#models) for more information about these model bundles.

In [9]:
!wget -O face_landmarker_v2_with_blendshapes.task -q https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task

## Configure The Face Detector Options

In [10]:
# make sure that 'face_landmarker_v2_with_blendshapes.task' is loaded

base_options = python.BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task')
options = vision.FaceLandmarkerOptions(base_options=base_options,
                                       output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=3,
                                      min_face_detection_confidence = 0.4,
                                      running_mode=vision.RunningMode.IMAGE)
detector = vision.FaceLandmarker.create_from_options(options)

I0000 00:00:1709515751.236978   57481 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1709515751.247834   57585 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: llvmpipe (LLVM 15.0.7, 256 bits)
W0000 00:00:1709515751.248512   57481 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


## Generating a video with the Head pose marks 

In [11]:
counter = '6'
video_path = f'./content/test{counter}.webm'
output_path = f'./out/test_marked{counter}.avi'
output_audio_path = f'./out/test{counter}.mp3'
final_path = f'./out/test{counter}.mp4'

process_video(video_path, output_path,detector)
# Add audio to the output video
extract_audio(video_path, output_audio_path)


Processing time: 89.3348650932312, for video of length 1212 frames.
MoviePy - Writing audio in ./out/test6.mp3


                                                                                

MoviePy - Done.




In [12]:
# merging the audio to the output marked video
ffmpeg_command = f"ffmpeg -y -i {output_path} -i {output_audio_path} -c:v copy -c:a aac -strict experimental '{final_path}'"
subprocess.call(ffmpeg_command, shell=True)



ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

0