In [5]:
#Import Libraries
import cv2
import itertools
import numpy as np
from time import time
import mediapipe as mp
import matplotlib.pyplot as plt
import pandas as pd
import re
import glob
import os
import pickle
import json
from tqdm import tqdm
from skimage import transform
import concurrent.futures

In [6]:
# Initialize the mediapipe face detection class
mp_face_detection = mp.solutions.face_detection
# Setup the face detection function
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
#initialize the mediapipe face mesh class
mp_face_mesh = mp.solutions.face_mesh
# Setup the face landmarks function for videos
face_mesh_videos = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1,
                                         min_detection_confidence=0.5,min_tracking_confidence=0.3)

In [7]:
def read_video(filename):
    cap = cv2.VideoCapture(filename)                                             
    while(cap.isOpened()):                                                       
        ret, frame = cap.read() # BGR                                            
        if ret:                                                                  
            yield frame                                                          
        else:                                                                    
            break                                                                
    cap.release()

In [8]:
def save2npz(filename, data=None):
    if filename[-4:] != '.npz':
        filename = filename + '.npz'                           
    if not os.path.exists(os.path.dirname(filename)):                            
        os.makedirs(os.path.dirname(filename))                                   
    np.savez_compressed(filename, data=data)

In [9]:
def get_face_points(video):
    vid_capture = cv2.VideoCapture(video)
    all_points = []
    points_data_regex = re.compile(r'\d\.\d+')
    time1 = 0

    if (vid_capture.isOpened() == False):
        print("Error opening the video file")
    else:
        while(vid_capture.isOpened()):
            ret, frame = vid_capture.read()
            if ret == True:
                mesh_result = face_mesh_videos.process(frame)
                oval_indexes = list(set(itertools.chain(*mp_face_mesh.FACEMESH_FACE_OVAL)))
                lips_indexes = list(set(itertools.chain(*mp_face_mesh.FACEMESH_LIPS)))
                points_per_frame = {}
                if mesh_result.multi_face_landmarks:
                    # Iterate over the found faces.
                    temp_oval_list = [None] * len(oval_indexes)
                    temp_lips_list = [None] * len(lips_indexes)
                    for face_no, face_landmarks in enumerate(mesh_result.multi_face_landmarks):
                        for count, each_oval_index in enumerate(oval_indexes):
                            xyzpointsraw = face_landmarks.landmark[each_oval_index]
                            points_list = points_data_regex.findall(str(xyzpointsraw))
                            if len(points_list) < 1:
                                pass
                            else:
                                xyclean = [float(points_list[0]), float(points_list[1])]
                                temp_oval_list[count] = xyclean
                        for count, each_lip_index in enumerate(lips_indexes):
                            xyzpointsraw = face_landmarks.landmark[each_lip_index]
                            points_list = points_data_regex.findall(str(xyzpointsraw))
                            if len(points_list) < 1:
                                pass
                            else:
                                xyclean = [float(points_list[0]), float(points_list[1])]
                                temp_lips_list[count] = xyclean

                    points_per_frame['oval_landmarks'] = np.array(temp_oval_list)
                    points_per_frame['lips_landmarks'] = np.array(temp_lips_list)
                    all_points.append(points_per_frame)
                else:
                    points_per_frame['oval_landmarks'] = np.array([None])
                    points_per_frame['lips_landmarks'] = np.array([None])
                    all_points.append(points_per_frame)
            else:
                break
            time2 = time()

            time1 = time2
            
            #k = cv2.waitKey(30) & 0xFF
            k = cv2.waitKey(10)
            if(k == 27):
                break
        vid_capture.release()
    return np.array(all_points)

In [10]:
def extract_multi_internal(video, landmarks_path, which_folder):
    file = os.path.basename(video)
    word = file.split("_")[0]
    points_array = get_face_points(video)
    new_savepath = os.path.join(landmarks_path, word, which_folder, file.rstrip('.mp4'))
    save2npz(new_savepath, data=points_array)

In [11]:
def extract_face_points_and_save(lrw_direc, landmarks_path, which_folder):
    videos = glob.glob(os.path.join(lrw_direc, '*', which_folder, '*.mp4'))
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(extract_multi_internal, video, landmarks_path, which_folder) for video in videos]

In [12]:
def multi_extract_save(lrw_direc, landmarks_path, which_folder):
    videos = glob.glob(os.path.join(lrw_direc, '*', which_folder, '*.mp4'))
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        for video, output_array in tqdm(executor.map(get_face_points, videos)):
            file = os.path.basename(video)
            word = file.split("_")[0]
            new_savepath = os.path.join(landmarks_path, word, which_folder, file.rstrip('.mp4'))
            save2npz(new_savepath, data=output_array)

In [13]:
def single_extract_face_points_and_save(lrw_direc, landmarks_path, which_folder):
    videos = glob.glob(os.path.join(lrw_direc, '*', which_folder, '*.mp4'))
    for video in tqdm(videos):
        extract_multi_internal(video, landmarks_path, which_folder)

In [15]:
lrw_path = '/media/taylorpap/1TBM2/DatasetML/lipread_mp4'
landmarks_path = '/media/taylorpap/1TBM2/DatasetML/Capstone/LANDMARKS'
which_folder = 'test'
single_extract_face_points_and_save(lrw_path, landmarks_path, which_folder)

  0%|          | 100/25000 [00:16<1:09:01,  6.01it/s]


KeyboardInterrupt: 

In [None]:
lrw_path = '/media/taylorpap/1TBM2/DatasetML/lipread_mp4'
landmarks_path = '/media/taylorpap/1TBM2/DatasetML/Capstone/LANDMARKS'
which_folder = 'train'
multi_extract_save(lrw_path, landmarks_path, which_folder)

100%|██████████| 488766/488766 [00:05<00:00, 85978.05it/s] 


[{'id': 9942, 'most_recent_fitting_scores': array([1.3021501, 1.2289561, 1.3895243], dtype=float32), 'facial_landmarks': array([[ 67.37307 , 102.33998 ],
        [ 67.9815  , 118.51357 ],
        [ 69.84342 , 134.71419 ],
        [ 72.836365, 150.84415 ],
        [ 78.37621 , 166.20525 ],
        [ 87.10389 , 180.23193 ],
        [ 98.489395, 192.255   ],
        [112.558205, 201.30927 ],
        [129.11067 , 203.89828 ],
        [145.84485 , 201.25172 ],
        [160.18242 , 192.10963 ],
        [171.65858 , 179.50352 ],
        [180.0401  , 164.99963 ],
        [185.3956  , 149.13167 ],
        [188.69629 , 132.62656 ],
        [190.53842 , 116.278114],
        [191.26785 , 100.21831 ],
        [ 81.380684,  84.86308 ],
        [ 87.70085 ,  77.79525 ],
        [ 97.19054 ,  74.330025],
        [107.55041 ,  74.920815],
        [116.74992 ,  78.82953 ],
        [141.00378 ,  78.73148 ],
        [150.43372 ,  74.73129 ],
        [160.95847 ,  74.01238 ],
        [170.9602  ,  77.6806 

In [None]:
def read_video(filename):
    cap = cv2.VideoCapture(filename)
    while(cap.isOpened()):
        ret, frame = cap.read() # BGR
        if ret:
            yield frame
        else:
            break
    cap.release()

In [None]:
def linear_interpolate(landmarks, start_idx, stop_idx):
    start_landmarks = landmarks[start_idx]
    stop_landmarks = landmarks[stop_idx]
    delta = stop_landmarks - start_landmarks
    for idx in range(1, stop_idx-start_idx):
        landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta
    return landmarks

In [None]:
def warp_img(src, dst, img, std_size):
    tform = transform.estimate_transform('similarity', src, dst)  # find the transformation matrix
    warped = transform.warp(img, inverse_map=tform.inverse, output_shape=std_size)  # wrap the frame image
    warped = warped * 255  # note output from wrap is double image (value range [0,1])
    warped = warped.astype('uint8')
    return warped, tform

In [None]:
path_for_testing = '/media/taylorpap/1TBM2/DatasetML/lipread_mp4/ABOUT/test/ABOUT_00001.mp4'