# Preprocessing Data
---

>**How to Read `landmark_id`**
>
>landmark_id structure: {`frame`}-{`landmark_type`}-{`landmark index`}
>
>
>**Example:** `42-left_hand-20`
>
>Frame → 42<br>
>Landmark Type → Left Hand<br>
>Landmark Index → 20

> **Index Range of Landmark Type**
>
> * Face: `0-467`
> * Pose: `468-500`
> * Right Hand: `501-521`
> * Left Hand: `522-542`
>
> Landmark order each frame: **|** [`IDX:0`] Face → Pose → Right Hand → Left Hand [`IDX:542`] **|** [`IDX:543`]

## Import Depedencies

In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import h5py
import tqdm

from matplotlib import pyplot as plt
import pandas as pd
import plotly.express as px
import seaborn as sns
from sklearn.model_selection import train_test_split

#Dummy to trigger tqdm
tqdm.tnrange(2)

  tqdm.tnrange(2)


  0%|          | 0/2 [00:00<?, ?it/s]

<tqdm.notebook.tqdm_notebook at 0x2185684a920>

## Helper Function

In [2]:
ROWS_PER_FRAME = 543
FIXED_FRAMES = 34

RH_IDX = 501
LH_IDX = 522
POSE_IDX = 468
FACE_IDX = 0

lips_UpperOuter = [185, 40, 39, 37, 0, 267, 269, 270, 409]
lips_LowerOuter = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
lips_UpperInner = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308]
lips_LowerInner = [191, 80, 81, 82, 13, 312, 311, 310, 415]
LIPS_IDX = np.concatenate(
    [lips_UpperOuter, lips_LowerOuter, lips_UpperInner, lips_LowerInner]
)

UPPER_BODY_IDX = np.arange(468, 493)

Original Source of Lips Index from Mediapipe Face Landmark: https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_landmark/tensors_to_face_landmarks_with_attention.pbtxt

In [None]:
def landmark_reduction(pq_path,
                       faceIDX = None, poseIDX = None,
                       rhIDX = None, lhIDX = None):

    df = pd.read_parquet(pq_path).drop(columns=['landmark_id'])
    total_frame = int(len(df) / ROWS_PER_FRAME)

    sequence_landmark=[]
    for frame in range(total_frame):
        boundary = ROWS_PER_FRAME * frame

        #Face
        if faceIDX:
            face = df.iloc[faceIDX+boundary,:-1].to_numpy().flatten()
        else:
            face = [None] * (468 * 3)

        #Pose
        if poseIDX:
            pose = df.iloc[poseIDX+boundary].to_numpy().flatten()
        else:
            pose = [None] * (33 * 4)

        #Right Hand
        if rhIDX:
            rh = df.iloc[rhIDX+boundary,:-1].to_numpy().flatten()
        else:
            rh = [None] * (21 * 3)

        #Left Hand
        if lhIDX:
            lh = df.iloc[lhIDX+boundary,:-1].to_numpy().flatten()
        else:
            lh = [None] * (21 * 3)

        result = np.concatenate([face,pose,rh,lh])
        sequence_landmark.append(result[result != np.array(None)].astype('float'))

    return sequence_landmark

In [None]:
def landmark_reduction(pq_path,
                       faceIDX = np.arange(0,468), poseIDX = np.arange(468,501),
                       rhIDX = np.arange(501,522), lhIDX = np.arange(522,543),
                       component = ['face', 'pose', 'right_hand', 'left_hand']):

    df = pd.read_parquet(pq_path).drop(columns=['landmark_id'])
    total_frame = int(len(df) / ROWS_PER_FRAME)

    sequence_landmark=[]
    for frame in range(total_frame):
        boundary = ROWS_PER_FRAME * frame

        #Face
        if 'face' in component:
            face = df.iloc[faceIDX+boundary,:-1].to_numpy().flatten()
        else:
            face = [None] * (len(faceIDX) * 3)

        #Pose
        if 'pose' in component:
            pose = df.iloc[poseIDX+boundary].to_numpy().flatten()
        else:
            pose = [None] * (len(poseIDX) * 4)

        #Right Hand
        if 'right_hand' in component:
            rh = df.iloc[rhIDX+boundary,:-1].to_numpy().flatten()
        else:
            rh = [None] * (len(rhIDX) * 3)

        #Left Hand
        if 'left_hand' in component:
            lh = df.iloc[lhIDX+boundary,:-1].to_numpy().flatten()
        else:
            lh = [None] * (len(lhIDX) * 3)

        result = np.concatenate([face,pose,rh,lh])
        sequence_landmark.append(result[result != np.array(None)].astype('float'))

    return sequence_landmark

In [None]:
def frame_equalization(seq_data, FIXED_FRAMES):

    total_frame = len(seq_data)
    sequence_landmark = np.expand_dims(np.array(seq_data), axis=0)
    if total_frame > FIXED_FRAMES:
        selected_idx = np.linspace(0, total_frame-1, FIXED_FRAMES, dtype=int)
        sequence_landmark = sequence_landmark[:,selected_idx,:]
    elif total_frame < FIXED_FRAMES:
        sequence_landmark = torch.from_numpy(np.array(sequence_landmark))
        sequence_landmark = F.interpolate(sequence_landmark.permute(0,2,1), size=(FIXED_FRAMES), mode= 'nearest-exact').permute(0,2,1).numpy()

    return np.squeeze(sequence_landmark, axis=0)

## Preprocess Dateset

In [69]:
df_expanded = pd.read_csv('balanced_expanded_data_map.csv')

In [None]:
Xtrain_IDX, Xval_IDX, Ytrain, Yval = train_test_split(np.arange(len(df_expanded)), df_expanded.sign, test_size=0.2, random_state=42)

#### 1. Base Component Dataset
(References from Previous Research)

In [8]:
x_data = []

for i in tqdm.notebook.tnrange(len(df_expanded.path), desc=f"Landmark Reduce and Frame Equalization"):
    prep_data = landmark_reduction(df_expanded.path[i],
                                   faceIDX = np.arange(0,468), 
                                   poseIDX = np.arange(468,501),
                                   rhIDX = np.arange(501,522), 
                                   lhIDX = np.arange(522,543))
    prep_data = frame_equalization(prep_data, FIXED_FRAMES)
    x_data.append(prep_data)

with h5py.File('data/datasets1.h5','w') as hf:

    hf.create_dataset('x_train',
                      data=np.take(x_data,Xtrain_IDX,axis=0))
    hf.create_dataset('y_train',data=np.array(Ytrain))
    hf.create_dataset('x_val',
                      data=np.take(x_data,Xval_IDX,axis=0))
    hf.create_dataset('y_val',data=np.array(Yval))
    print(hf.get('x_train'))
    print(hf.get('y_train'))
    print(hf.get('x_val'))
    print(hf.get('y_val'))

del x_data

Preprocessing Data:   0%|          | 0/1021 [00:00<?, ?it/s]

<HDF5 dataset "x_train": shape (816, 34, 1662), type "<f8">
<HDF5 dataset "y_train": shape (816,), type "|O">
<HDF5 dataset "x_val": shape (205, 34, 1662), type "<f8">
<HDF5 dataset "y_val": shape (205,), type "|O">


#### 2. Hand, Body, Mouth Dataset Component

In [9]:
x_data = []

for i in tqdm.notebook.tnrange(len(df_expanded.path), desc=f"Landmark Reduce and Frame Equalization"):
    prep_data = landmark_reduction(df_expanded.path[i],
                                   faceIDX = LIPS_IDX, 
                                   poseIDX = np.arange(468,501),
                                   rhIDX = np.arange(501,522), 
                                   lhIDX = np.arange(522,543))
    prep_data = frame_equalization(prep_data, FIXED_FRAMES)
    x_data.append(prep_data)

with h5py.File('data/datasets2.h5','w') as hf:

    hf.create_dataset('x_train',data=np.take(x_data,Xtrain_IDX,axis=0))
    hf.create_dataset('y_train',data=np.array(Ytrain))
    hf.create_dataset('x_val',data=np.take(x_data,Xval_IDX,axis=0))
    hf.create_dataset('y_val',data=np.array(Yval))
    print(hf.get('x_train'))
    print(hf.get('y_train'))
    print(hf.get('x_val'))
    print(hf.get('y_val'))

del x_data

Preprocessing Data:   0%|          | 0/1021 [00:00<?, ?it/s]

<HDF5 dataset "x_train": shape (816, 34, 378), type "<f8">
<HDF5 dataset "y_train": shape (816,), type "|O">
<HDF5 dataset "x_val": shape (205, 34, 378), type "<f8">
<HDF5 dataset "y_val": shape (205,), type "|O">


#### 3. Hand, Upper Body, Face Component Dataset

In [10]:
x_data = []

for i in tqdm.notebook.tnrange(len(df_expanded.path), desc=f"Landmark Reduce and Frame Equalization"):
    prep_data = landmark_reduction(df_expanded.path[i],
                                   faceIDX = np.arange(0,468), 
                                   poseIDX = UPPER_BODY_IDX,
                                   rhIDX = np.arange(501,522), 
                                   lhIDX = np.arange(522,543))
    prep_data = frame_equalization(prep_data, FIXED_FRAMES)
    x_data.append(prep_data)

with h5py.File('data/datasets3.h5','w') as hf:

    hf.create_dataset('x_train',data=np.take(x_data,Xtrain_IDX,axis=0))
    hf.create_dataset('y_train',data=np.array(Ytrain))
    hf.create_dataset('x_val',data=np.take(x_data,Xval_IDX,axis=0))
    hf.create_dataset('y_val',data=np.array(Yval))
    print(hf.get('x_train'))
    print(hf.get('y_train'))
    print(hf.get('x_val'))
    print(hf.get('y_val'))

del x_data

Preprocessing Data:   0%|          | 0/1021 [00:00<?, ?it/s]

<HDF5 dataset "x_train": shape (816, 34, 1630), type "<f8">
<HDF5 dataset "y_train": shape (816,), type "|O">
<HDF5 dataset "x_val": shape (205, 34, 1630), type "<f8">
<HDF5 dataset "y_val": shape (205,), type "|O">


#### 4. Hand, Upper Body, Mouth Component

In [11]:
x_data = []

for i in tqdm.notebook.tnrange(len(df_expanded.path), desc=f"Landmark Reduce and Frame Equalization"):
    prep_data = landmark_reduction(df_expanded.path[i],
                                   faceIDX = LIPS_IDX,
                                   poseIDX = UPPER_BODY_IDX,
                                   rhIDX = np.arange(501,522), 
                                   lhIDX = np.arange(522,543))
    prep_data = frame_equalization(prep_data, FIXED_FRAMES)
    x_data.append(prep_data)

with h5py.File('data/datasets4.h5','w') as hf:

    hf.create_dataset('x_train',data=np.take(x_data,Xtrain_IDX,axis=0))
    hf.create_dataset('y_train',data=np.array(Ytrain))
    hf.create_dataset('x_val',data=np.take(x_data,Xval_IDX,axis=0))
    hf.create_dataset('y_val',data=np.array(Yval))
    print(hf.get('x_train'))
    print(hf.get('y_train'))
    print(hf.get('x_val'))
    print(hf.get('y_val'))

del x_data

Preprocessing Data:   0%|          | 0/1021 [00:00<?, ?it/s]

<HDF5 dataset "x_train": shape (816, 34, 346), type "<f8">
<HDF5 dataset "y_train": shape (816,), type "|O">
<HDF5 dataset "x_val": shape (205, 34, 346), type "<f8">
<HDF5 dataset "y_val": shape (205,), type "|O">


#### 5. Hand Component

In [12]:
x_data = []

for i in tqdm.notebook.tnrange(len(df_expanded.path), desc=f"Landmark Reduce and Frame Equalization"):
    prep_data = landmark_reduction(df_expanded.path[i],
                                   rhIDX = np.arange(501,522), 
                                   lhIDX = np.arange(522,543))
    prep_data = frame_equalization(prep_data, FIXED_FRAMES)
    x_data.append(prep_data)


with h5py.File('data/datasets5.h5','w') as hf:

    hf.create_dataset('x_train',data=np.take(x_data,Xtrain_IDX,axis=0))
    hf.create_dataset('y_train',data=np.array(Ytrain))
    hf.create_dataset('x_val',data=np.take(x_data,Xval_IDX,axis=0))
    hf.create_dataset('y_val',data=np.array(Yval))
    print(hf.get('x_train'))
    print(hf.get('y_train'))
    print(hf.get('x_val'))
    print(hf.get('y_val'))

del x_data

Preprocessing Data:   0%|          | 0/1021 [00:00<?, ?it/s]

<HDF5 dataset "x_train": shape (816, 34, 126), type "<f8">
<HDF5 dataset "y_train": shape (816,), type "|O">
<HDF5 dataset "x_val": shape (205, 34, 126), type "<f8">
<HDF5 dataset "y_val": shape (205,), type "|O">
