## Import required libraries

In [372]:
import csv
from pathlib import Path
from sys import __stdout__
from ast import literal_eval
from pickle import dump

import numpy as np
import pandas as pd
import sklearn as sk
import copy
from pandas import DataFrame
from sklearn.decomposition import TruncatedSVD
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from typing import Dict, Set, Tuple, List, Any, Optional, Callable

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils # type: ignore
mp_drawing_styles = mp.solutions.drawing_styles # type: ignore
mp_face_mesh = mp.solutions.face_mesh # type: ignore
mp_face_mesh_connections = mp.solutions.face_mesh_connections # type: ignore

from settings import *

## Prepare the Training Data

In [373]:
BLENDSHAPE_I = []
WEIGHTS = []
IMAGE_FILES = []

csv_file = open(BLENDSHAPE_FILE, "r")
reader = csv.reader(csv_file, skipinitialspace=True, delimiter=",")
## skip header
next(reader, None)
for line in reader:
    if not Path(line[2]).exists():
        print(line[2])
        print("File not found")
        continue
    BLENDSHAPE_I.append(line[0])
    WEIGHTS.append(line[1])
    IMAGE_FILES.append(line[2])
csv_file.close()

train_file = open(TRAIN_FILE, "w")
writer = csv.writer(train_file, delimiter=",", quoting=csv.QUOTE_ALL)
writer.writerow(HEADERS)

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
) as face_mesh:
    for idx, file in enumerate(IMAGE_FILES):
        image = cv2.imread(file)
        # Convert the BGR image to RGB before processing.
        results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # Print and draw face mesh landmarks on the image.
        if not results.multi_face_landmarks:
            continue
        annotated_image = image.copy()
        arr = [BLENDSHAPE_I[idx], WEIGHTS[idx]]

        # Assume to have only one face in tracking, 
        # Following is equal to results.multi_face_landmarks[0]
        for face_landmarks in results.multi_face_landmarks:
            for landmark in face_landmarks.landmark:
                arr.append([landmark.x, landmark.y, landmark.z])
            writer.writerow(arr)
train_file.close()


/Users/lokeyli/Documents/Unity/Unity-Web-socket/index67-weight$0.png
File not found


## Load Train Dataset

In [374]:
blendshape_idx_lst = []
pipelines: Dict[str, Pipeline] = dict()
models: Dict[str, Callable]  = dict()
selection_methods: Dict[str, Callable] = dict()
training_set_transformers: Dict[str, Callable] = dict()

class BlendshapeData:
    def __init__(self, train_X, test_X, train_Y, test_Y) -> None:
        self.train_X = train_X
        self.test_X = test_X
        self.train_Y = train_Y
        self.test_Y = test_Y

def default_split(X, Y) -> BlendshapeData:
    train_X, test_X, train_Y, test_Y = train_test_split(
            X, Y, test_size=0.2, random_state=42, shuffle=True
        )
    return BlendshapeData(train_X, test_X, train_Y, test_Y)


In [375]:
class Pipeline:

    def __init__(self, pipeline_name: str, 
                dataset_transformer: Callable, 
                model: Callable,
                split: Callable,
                selection_method: Optional[Callable] = None) -> None:
        self.pipeline_name = pipeline_name
        self.dataset_transformer = dataset_transformer
        self.selection_method = selection_method
        self.model = model
        self.split = split
    
    def fit(self, train_df: DataFrame):
        selectors = dict()
        predictors = dict()
        results = []
        transformed_df = self.dataset_transformer(train_df)
        for blendshape_i in blendshape_idx_lst:
            sub_df = transformed_df[transformed_df["blendshape_i"] == blendshape_i]
            reduced_X = sub_df.iloc[:, 2:]
            Y = sub_df.filter(regex="weight").to_numpy().flatten()
            selectors[f"{blendshape_i}"] = None
            if self.selection_method:
                predictor = self.selection_method(reduced_X, Y)
                reduced_X = predictor.fit_transform(reduced_X, Y)
                selectors[f"{blendshape_i}"] = copy.deepcopy(predictor)
            splitted_data = self.split(reduced_X, Y)
            self.model.fit(splitted_data.train_X, splitted_data.train_Y)
            score = self.model.score(splitted_data.test_X, splitted_data.test_Y)
            results.append(score)
            predictors[f"{blendshape_i}"] = copy.deepcopy(self.model)
            # print(f"{self.pipeline_name}: blendshape_i={blendshape_i}, score={score}")
        print(f"{self.pipeline_name}: mean={np.mean(results)}, min={np.min(results)}, max={np.max(results)}")
        return selectors, predictors

In [376]:
train_df = pd.read_csv(
    TRAIN_FILE, header=0, names=HEADERS, delimiter=",", index_col=False
)
blendshape_idx_lst = train_df["blendshape_i"].drop_duplicates().to_list()
landmarks = train_df.columns[2:].to_list()
train_df[landmarks] = train_df[landmarks].applymap(literal_eval).applymap(np.array)


In [377]:
print(blendshape_idx_lst)

[67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114]


### Generating different training sets

#### Default training set

In [378]:
def default_set(input_df: DataFrame) -> DataFrame:
    """Produce the training set that using 478 landmarks' coordinates (1434 in total) as features

    Args:
        input_df (DataFrame): the origin dataframe from csv file

    Returns:
        DataFrame: the training set with distance between landmarks
    """
    default_train_columns = (input_df.columns[:2].to_list()
        + [f"{i}_landmark_{j}" for i in range(N_LANDMARKS) for j in ["x", "y", "z"]])

    default_train_data =[]

    for i, row in input_df.iterrows():
        val = row.values
        new_row = val[:2]
        new_row = np.append(new_row, np.concatenate(val[2:]))
        default_train_data.append(new_row)
    default_training_set = DataFrame(data=default_train_data, columns=default_train_columns)
    return default_training_set
training_set_transformers["default"] = default_set

#### Distance training set

In [398]:
def distance_set(input_df: DataFrame) -> DataFrame:
    """Produce the training set with distance between certain sets of landmarks

    Args:
        input_df (DataFrame): the origin dataframe from csv file

    Returns:
        DataFrame: the training set with distance between landmarks
    """
    vertices_sets: Dict[str, Set[Tuple[int, int]]] = {
        "FACEMESH_FACE_OVAL": mp_face_mesh_connections.FACEMESH_FACE_OVAL,
        "FACEMESH_LIPS": mp_face_mesh_connections.FACEMESH_LIPS, 
        "FACEMESH_LEFT_EYE": mp_face_mesh_connections.FACEMESH_LEFT_EYE,
        "FACEMESH_LEFT_IRIS": mp_face_mesh_connections.FACEMESH_LEFT_IRIS,
        "FACEMESH_LEFT_EYEBROW": mp_face_mesh_connections.FACEMESH_LEFT_EYEBROW,
        "FACEMESH_RIGHT_EYE": mp_face_mesh_connections.FACEMESH_RIGHT_EYE,
        "FACEMESH_RIGHT_EYEBROW": mp_face_mesh_connections.FACEMESH_RIGHT_EYEBROW,
        "FACEMESH_RIGHT_IRIS": mp_face_mesh_connections.FACEMESH_RIGHT_IRIS}

    # define the column names
    distance_train_columns = train_df.columns[:2].to_list()
    for name, vertices_set in vertices_sets.items():
        for idx, vertices in enumerate(vertices_set):
            column = f"{name}_distance_{idx}"
            distance_train_columns.append(column)

    distance_train_data = DataFrame(columns=distance_train_columns, dtype=np.float64)

    for i, row in train_df.iterrows():
        new_row = list(row.values[:2])
        for name, vertices_set in vertices_sets.items():
            for idx, vertices in enumerate(vertices_set):
                new_row.append(np.linalg.norm(row[vertices[0] + 2] - row[vertices[1] + 2]))
        distance_train_data.loc[i] = new_row

    # cast the data types in the dataframe
    for column in distance_train_columns[:2]:
        distance_train_data[column] = distance_train_data[column].fillna(0).astype(int)
    
    return distance_train_data

training_set_transformers["distance"] = distance_set

def distance_for_prediction(input_df: DataFrame) -> DataFrame:
    vertices_sets: Dict[str, Set[Tuple[int, int]]] = {
        "FACEMESH_FACE_OVAL": mp_face_mesh_connections.FACEMESH_FACE_OVAL,
        "FACEMESH_LIPS": mp_face_mesh_connections.FACEMESH_LIPS, 
        "FACEMESH_LEFT_EYE": mp_face_mesh_connections.FACEMESH_LEFT_EYE,
        "FACEMESH_LEFT_IRIS": mp_face_mesh_connections.FACEMESH_LEFT_IRIS,
        "FACEMESH_LEFT_EYEBROW": mp_face_mesh_connections.FACEMESH_LEFT_EYEBROW,
        "FACEMESH_RIGHT_EYE": mp_face_mesh_connections.FACEMESH_RIGHT_EYE,
        "FACEMESH_RIGHT_EYEBROW": mp_face_mesh_connections.FACEMESH_RIGHT_EYEBROW,
        "FACEMESH_RIGHT_IRIS": mp_face_mesh_connections.FACEMESH_RIGHT_IRIS}

    # define the column names
    distance_train_columns = list()
    for name, vertices_set in vertices_sets.items():
        for idx, vertices in enumerate(vertices_set):
            column = f"{name}_distance_{idx}"
            distance_train_columns.append(column)

    distance_train_data = DataFrame(columns=distance_train_columns, dtype=np.float64)

    for i, row in input_df.iterrows():
        new_row = list()
        for name, vertices_set in vertices_sets.items():
            for idx, vertices in enumerate(vertices_set):
                new_row.append(np.linalg.norm(row[vertices[0]] - row[vertices[1]]))
        distance_train_data.loc[0] = new_row # type: ignore
    
    return distance_train_data


#### Cartesian product default training set

In [380]:
def cartesian_product_default_set(input_df: DataFrame) -> DataFrame:
    res_dataframe = DataFrame()
    for blendshape_i in blendshape_idx_lst:
        dataframe = default_set(input_df)
        dataframe.loc[:, 'blendshape_i'] = blendshape_i
        if res_dataframe is None:
            res_dataframe = dataframe
        else:  
            res_dataframe = pd.concat([res_dataframe, dataframe], ignore_index=True)
    return res_dataframe
training_set_transformers["cartesian_product_default"] = cartesian_product_default_set

## Features selection

### PCA

In [381]:
def selection_pca(X, Y) -> PCA:
    n: int = min(X.shape)
    pca = PCA(n_components=n)
    pca.fit(X=X, y=Y)
    explained_variance_ratios = pca.explained_variance_ratio_ # type: ignore
    for i in range(n):
        if sum(explained_variance_ratios[:i]) > 0.95:
            n = i
            break
    pca = PCA(n_components=n)
    return pca
    
selection_methods["PCA"] = selection_pca


### TSNE

In [382]:
def selection_tsne(X, Y) -> TSNE:
    n: int = X.shape[1]
    tsne = TSNE(n_components=n)
    tsne.fit(X=X, y=Y)
    explained_variance_ratios = pca.explained_variance_ratio_ # type: ignore
    for i in range(n):
        if sum(explained_variance_ratios[:i]) > 0.95:
            n = i
            break
    print("TSNE: n_components=", n)
    tsne = TSNE(n_components=n)
    return tsne

selection_methods["TSNE"] = selection_tsne

## Model Training and Evaluation

### Ensemble Models

#### Random Forest Regressor

In [383]:
models["random_forest_regressor"] = RandomForestRegressor()

#### Ada Boost Regressor

In [384]:
models["ada_boost_regressor"] = AdaBoostRegressor()

### Training

In [385]:
# pipelines["default_random_forest_regressor"] = Pipeline(
#     pipeline_name="default_random_forest_regressor",
#     dataset_transformer=training_set_transformers["default"],
#     selection_method=None,
#     model=models["random_forest_regressor"],
#     split=default_split)
# pipelines["distance_random_forest_regressor"] = Pipeline(
#     pipeline_name="distance_random_forest_regressor",
#     dataset_transformer=training_set_transformers["distance"],
#     selection_method=None,
#     model=models["random_forest_regressor"],
#     split=default_split)
# pipelines["default_ada_boost_regressor"] = Pipeline(
#     pipeline_name="default_ada_boost_regressor",
#     dataset_transformer=training_set_transformers["default"],
#     selection_method=None,
#     model=models["ada_boost_regressor"],
#     split=default_split)
# pipelines["distance_ada_boost_regressor"] = Pipeline(
#     pipeline_name="distance_ada_boost_regressor",
#     dataset_transformer=training_set_transformers["distance"],
#     selection_method=None,
#     model=models["ada_boost_regressor"],
#     split=default_split)
pipelines["default_pca_ada_boost_regressor"] = Pipeline(
    pipeline_name="default_pca_ada_boost_regressor",
    dataset_transformer=training_set_transformers["default"],
    selection_method=selection_methods["PCA"],
    model=models["ada_boost_regressor"],
    split=default_split)
pipelines["distance_pca_ada_boost_regressor"] = Pipeline(
    pipeline_name="distance_pca_ada_boost_regressor",
    dataset_transformer=training_set_transformers["distance"],
    selection_method=selection_methods["PCA"],
    model=models["ada_boost_regressor"],
    split=default_split)
# pipelines["distance_tsne_ada_boost_regressor"] = Pipeline(
#     pipeline_name="distance_tsne_ada_boost_regressor",
#     dataset_transformer=training_set_transformers["distance"],
#     selection_method=selection_methods["TSNE"],
#     model=models["ada_boost_regressor"],
#     split=default_split)
# pipelines["distance_tsne_ada_boost_regressor"] = Pipeline(
#     pipeline_name="distance_tsne_ada_boost_regressor",
#     dataset_transformer=training_set_transformers["distance"],
#     selection_method=selection_methods["TSNE"],
#     model=models["ada_boost_regressor"],
#     split=default_split)

In [386]:
selectors_groups: Dict[str, Dict[str, Any]] = dict()
predictors_groups: Dict[str, Dict[str, Any]] = dict()

for pipeline in pipelines.values():
    selectors, predictors = pipeline.fit(train_df)
    selectors_groups[pipeline.pipeline_name] = selectors
    predictors_groups[pipeline.pipeline_name] = predictors

default_pca_ada_boost_regressor: mean=0.9813951761843915, min=0.8682190943231096, max=0.9941995125205076
distance_pca_ada_boost_regressor: mean=0.9829311562654667, min=0.9270581622501624, max=0.9957184001658889


In [None]:
# dict that contains the predictors for each training set and model (Cartesian product)
# 1 combination per training set and model is called a "group"
# group name = [train_set_name] + [model_name]
# 1 "group" contains all the predictors for "each blendshape"
predictors_groups: Dict[str, Dict[str, Any]] = dict()
results: Dict[str, List[int]] = dict()
for training_set_name, training_set in training_set_transformers.items():
    for model_name, model in models.items():
        predictors = dict()
        data = prepare_data(training_set(train_df), blendshape_idx_lst)
        result = []
        for blendshape_data in data:
            model.fit(blendshape_data.train_X, blendshape_data.train_Y)
            score = model.score(blendshape_data.test_X, blendshape_data.test_Y)
            # print(
            #     f"{model.__class__.__name__}_blendshape_{blendshape_data.unity_blendshape_i} score: {score}"
            # )
            result.append(score)
            predictors[f"{blendshape_data.unity_blendshape_i}"] = copy.deepcopy(model)
        results[training_set_name + "_" + model_name] = result
        print("=" * 50)
        predictors_groups[training_set_name + "_" + model_name] = predictors
        for predictors_group_name, predictors_group in predictors_groups.items():
            for predictor_name, predictor in predictors_group.items():
                print(  "predictors_group_name", predictors_group_name, 
                        "predictor_name", predictor_name, 
                        "n_features_in", predictor.n_features_in_)
for res in results:
    print(f"{res}: mean={np.mean(results[res])}, min={np.min(results[res])}, max={np.max(results[res])}")

predictors_group_name default_random_forest_regressor predictor_name 67 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 68 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 69 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 70 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 71 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 72 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 73 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 74 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 75 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 76 n_features_in 1434
predictors_group_name default_random_forest_regressor predictor_name 77 n_features_in 1434

KeyboardInterrupt: 

In [None]:
results: Dict[str, List[int]] = dict()
for training_set_name, training_set in training_set_transformers.items():
    for model_name, model in models.items():
        predictors = dict()
        data = prepare_data(training_set(train_df), blendshape_idx_lst)
        result = []
        for blendshape_data in data:
            model.fit(blendshape_data.train_X, blendshape_data.train_Y)
            score = model.score(blendshape_data.test_X, blendshape_data.test_Y)
            # print(
            #     f"{model.__class__.__name__}_blendshape_{blendshape_data.unity_blendshape_i} score: {score}"
            # )
            result.append(score)
            predictors[f"{blendshape_data.unity_blendshape_i}"] = copy.deepcopy(model)
        results[training_set_name + "_" + model_name] = result
        print("=" * 50)
        predictors_groups[training_set_name + "_" + model_name] = predictors
        for predictors_group_name, predictors_group in predictors_groups.items():
            for predictor_name, predictor in predictors_group.items():
                print(  "predictors_group_name", predictors_group_name, 
                        "predictor_name", predictor_name, 
                        "n_features_in", predictor.n_features_in_)
for res in results:
    print(f"{res}: mean={np.mean(results[res])}, min={np.min(results[res])}, max={np.max(results[res])}")

In [404]:
IMAGE_FILES = ["/Users/lokeyli/Documents/Unity/Unity-Web-socket/index67-weight$100.png"]

selectors = selectors_groups["distance_pca_ada_boost_regressor"]
predictors = predictors_groups["distance_pca_ada_boost_regressor"]

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
) as face_mesh:
    for idx, file in enumerate(IMAGE_FILES):
        image = cv2.imread(file)
        # Convert the BGR image to RGB before processing.
        results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # Print and draw face mesh landmarks on the image.
        if not results.multi_face_landmarks:
            continue
        annotated_image = image.copy()
        arr = []
        for face_landmarks in results.multi_face_landmarks:
            for a in face_landmarks.landmark:
                arr.append(np.array([a.x, a.y, a.z]))
        predict_df = pd.DataFrame([arr], columns=HEADERS[2:])
        for idx, model in predictors.items():
            print(int(model.predict(selectors[idx].transform(distance_for_prediction(predict_df)))[0]), end=", ")

97, 30, 71, 50, 7, 5, 78, 12, 36, 8, 4, 3, 10, 5, 6, 2, 22, 20, 4, 14, 4, 38, 4, 17, 13, 2, 19, 3, 2, 11, 3, 29, 2, 10, 5, 26, 21, 21, 19, 14, 70, 23, 17, 10, 20, 9, 72, 8, 

In [402]:
for selector in selectors_groups["distance_pca_ada_boost_regressor"].values():
    print(selector.n_components_)

2
2
2
2
4
5
4
4
6
5
5
4
4
4
3
1
2
2
5
5
4
4
2
2
4
1
5
3
1
3
3
2
1
6
5
4
3
4
4
6
4
3
5
5
5
6
5
5


## Export the ideal models

In [410]:
selector_group = selectors_groups["distance_pca_ada_boost_regressor"]
print("selector_group", selector_group)
for blendshape_i, selector in selector_group.items():
    with open(f"fm2bs_selector_{blendshape_i}.pkl", "wb") as f:
        dump(selector, f)
predictors_group = predictors_groups["distance_pca_ada_boost_regressor"]
print("predictors_group", predictors_group)
for blendshape_i, model in predictors_group.items():
    with open(f"fm2bs_model_{blendshape_i}.pkl", "wb") as f:
        dump(model, f)

selector_group {'67': PCA(n_components=2), '68': PCA(n_components=2), '69': PCA(n_components=2), '70': PCA(n_components=2), '71': PCA(n_components=4), '72': PCA(n_components=5), '73': PCA(n_components=4), '74': PCA(n_components=4), '75': PCA(n_components=6), '76': PCA(n_components=5), '77': PCA(n_components=5), '78': PCA(n_components=4), '79': PCA(n_components=4), '80': PCA(n_components=4), '81': PCA(n_components=3), '82': PCA(n_components=1), '83': PCA(n_components=2), '84': PCA(n_components=2), '85': PCA(n_components=5), '86': PCA(n_components=5), '87': PCA(n_components=4), '88': PCA(n_components=4), '89': PCA(n_components=2), '90': PCA(n_components=2), '91': PCA(n_components=4), '92': PCA(n_components=1), '93': PCA(n_components=5), '94': PCA(n_components=3), '95': PCA(n_components=1), '96': PCA(n_components=3), '97': PCA(n_components=3), '98': PCA(n_components=2), '99': PCA(n_components=1), '100': PCA(n_components=6), '101': PCA(n_components=5), '102': PCA(n_components=4), '103': PC