In [None]:
import os
import sys
import numpy as np
import pandas as pd
from pathlib import Path

root = Path(os.getcwd()).parents[1].resolve()
sys.path.insert(0, str(root))

# Load Video

In [None]:
from core.utils.video import video_read

sample_video_path = root.joinpath('data/videos/sample.mp4')

stream = video_read(sample_video_path)

stream.shape

# Face and Lip Detection

In [None]:
from common.face_detection import recognition


def detector(frame):
    """detecting face and lip area from image

    Args:
        frame (np.array): numpy array with shape H x W x C

    Returns:
        face: numpy array of cropped face image
        lip: numpy array of cropped lip image
    """
    res = recognition(frame)

    face = res[0] if res[0] is not None and 0 not in res[0].shape else None
    lip = res[1] if res[0] is not None and 0 not in res[0].shape else None

    return face, lip

def cleaning(stream):
    """Remove null values after face detection process

    Args:
        stream (_type_): _description_

    Returns:
        _type_: _description_
    """
    return pd.Series(data=stream).fillna(method="backfill").to_numpy()

from cv2 import resize

def resize_face(frame):
    return resize(frame, dsize=(224,224))

def resize_lip(frame):
    return resize(frame, dsize=(100,50))


from multiprocessing import Pool
from more_itertools import unzip

def processing(stream):
    """Detecting faces and lips from video stream

    Args:
        stream (_type_): _description_

    Returns:
        _type_: _description_
    """
    faces, lips = None, None

    with Pool(os.cpu_count() - 1) as pool:
        results = pool.map_async(detector, stream).get()

        faces, lips = unzip(results)
        faces, lips = cleaning(list(faces)), cleaning(list(lips))

        faces = pool.map_async(resize_face, faces).get()
        lips = pool.map_async(resize_lip, lips).get()

    return faces, lips

In [None]:
faces, lips = processing(stream)

# Generate tensorflow Dataset

In [None]:
import tensorflow as tf
from numpy.lib.stride_tricks import sliding_window_view

window_width = 75

def generate_dataset(faces, lips):
    face_window = np.moveaxis(
        sliding_window_view(
            x=faces,
            window_shape=window_width,
            axis=0
        ),
        source=-1,
        destination=1
    )
    lip_window = np.moveaxis(
        sliding_window_view(
            x=lips,
            window_shape=window_width,
            axis=0
        ),
        source=-1,
        destination=1
    )

    def face_data_generator():
        for batch in face_window:
            yield batch

    def lip_data_generator():
        for batch in lip_window:
            yield batch


    face_dataset = tf.data.Dataset.from_generator(
        face_data_generator,
        output_signature=(
            tf.TensorSpec(shape=(75, 224,224,3), dtype=tf.uint8)
        )
       
    )
    lip_dataset = tf.data.Dataset.from_generator(
        lip_data_generator,
        output_signature=(
            tf.TensorSpec(shape=(75,50,100,3), dtype=tf.uint8)
        )

    )

    return tf.data.Dataset.zip(((face_dataset,lip_dataset),)).batch(1).prefetch(1)

dataset = generate_dataset(faces, lips)

# Load DNN

In [None]:
from core.models.dnn import DNN
from core.utils.config import LipNetConfig, BaselineConfig

lipnet_weight = root.joinpath("models/lipnet/lipnet.h5")
baseline_weight = root.joinpath("models/baseline/mobilenet")

model = DNN(LipNetConfig(), BaselineConfig(), lipnet_weight, baseline_weight)

model.load_weights(
    root.joinpath("models/dnn/2022-07-04T1414/dnn_37_0.32.h5")
)

model.compile(metrics=['accuracy'])

# Prediction

In [None]:
prediction = model.predict(dataset)