In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv("gesture_label.csv")

print("Shape:", df.shape)
df

Shape: (86730, 9)


Unnamed: 0,frame,time,hand,landmark_id,x,y,z,hand_detected,Gesture
0,0,0.000000,Left,0,0.334776,0.658494,-6.840000e-08,1,other
1,0,0.000000,Left,1,0.285230,0.643635,2.382442e-03,1,other
2,0,0.000000,Left,2,0.256761,0.602198,5.615561e-03,1,other
3,0,0.000000,Left,3,0.243974,0.564563,1.095505e-02,1,other
4,0,0.000000,Left,4,0.236150,0.537544,1.926332e-02,1,other
...,...,...,...,...,...,...,...,...,...
86725,3232,107.733333,Right,16,,,,0,other
86726,3232,107.733333,Right,17,,,,0,other
86727,3232,107.733333,Right,18,,,,0,other
86728,3232,107.733333,Right,19,,,,0,other


In [3]:
missing = df.isnull().sum()
print("Missing values per column:\n", missing)
df.dtypes

Missing values per column:
 frame                0
time                 0
hand                 0
landmark_id          0
x                22344
y                22344
z                22344
hand_detected        0
Gesture              0
dtype: int64


frame              int64
time             float64
hand              object
landmark_id        int64
x                float64
y                float64
z                float64
hand_detected      int64
Gesture           object
dtype: object

In [4]:
df_clean = df[(df['hand_detected'] == 1) & df[['x', 'y', 'z']].notna().all(axis=1)]

In [5]:
df_clean 

Unnamed: 0,frame,time,hand,landmark_id,x,y,z,hand_detected,Gesture
0,0,0.000000,Left,0,0.334776,0.658494,-6.840000e-08,1,other
1,0,0.000000,Left,1,0.285230,0.643635,2.382442e-03,1,other
2,0,0.000000,Left,2,0.256761,0.602198,5.615561e-03,1,other
3,0,0.000000,Left,3,0.243974,0.564563,1.095505e-02,1,other
4,0,0.000000,Left,4,0.236150,0.537544,1.926332e-02,1,other
...,...,...,...,...,...,...,...,...,...
77695,3017,100.566667,Left,16,0.511442,0.438187,-1.391166e-01,1,other
77696,3017,100.566667,Left,17,0.548040,0.564964,-5.107581e-02,1,other
77697,3017,100.566667,Left,18,0.545136,0.512766,-8.193469e-02,1,other
77698,3017,100.566667,Left,19,0.523174,0.491191,-9.636851e-02,1,other


In [6]:
df_clean['Gesture'].value_counts()

Gesture
h        18482
r        13020
p        12955
other     9744
s         9387
g          798
Name: count, dtype: int64

In [7]:
frame_per_gesture = df_clean.groupby('Gesture')['frame'].nunique().sort_values(ascending=False)

print("frame per Gesture:")
print(frame_per_gesture)

frame per Gesture:
Gesture
h        805
p        526
r        472
other    463
s        399
g         38
Name: frame, dtype: int64


In [8]:
df_clean.describe()

Unnamed: 0,frame,time,landmark_id,x,y,z,hand_detected
count,64386.0,64386.0,64386.0,64386.0,64386.0,64386.0,64386.0
mean,1501.708741,50.056958,10.0,0.45391,0.390808,0.004074,1.0
std,874.002371,29.133412,6.055348,0.119338,0.128377,0.034752,0.0
min,0.0,0.0,0.0,0.020685,-0.069587,-0.212387,1.0
25%,756.0,25.2,5.0,0.370691,0.314075,-0.018549,1.0
50%,1445.5,48.183333,10.0,0.460004,0.39169,0.001388,1.0
75%,2302.0,76.733333,15.0,0.542844,0.46556,0.024265,1.0
max,3017.0,100.566667,20.0,0.822488,0.943631,0.177004,1.0


In [9]:
df_wide = df_clean.pivot_table(index=["frame", "Gesture"], 
                                columns="landmark_id", 
                                values=["x", "y", "z"])
df_wide.columns = [f"{coord}_{lmk}" for coord, lmk in df_wide.columns]
df_wide = df_wide.reset_index()


In [10]:
df_wide

Unnamed: 0,frame,Gesture,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,z_11,z_12,z_13,z_14,z_15,z_16,z_17,z_18,z_19,z_20
0,0,other,0.334776,0.285230,0.256761,0.243974,0.236150,0.274453,0.257871,0.246749,...,0.004127,0.016283,-0.009506,-0.007259,0.003640,0.013392,-0.006324,-0.005883,0.000068,0.007493
1,1,other,0.333568,0.286301,0.258806,0.244918,0.234262,0.272687,0.256561,0.246050,...,0.013623,0.031095,-0.004651,-0.002314,0.012014,0.026336,0.000154,0.000407,0.007652,0.017779
2,2,other,0.333439,0.285277,0.258234,0.243815,0.231658,0.272503,0.255895,0.245980,...,0.016355,0.034239,-0.003098,0.000325,0.015963,0.031094,0.002404,0.003686,0.011988,0.022922
3,3,other,0.333571,0.285669,0.258791,0.244590,0.232594,0.272561,0.255767,0.245902,...,0.015671,0.033168,-0.003151,0.000104,0.015307,0.030043,0.002436,0.003547,0.011552,0.022189
4,4,other,0.333600,0.285918,0.259218,0.246102,0.236276,0.271986,0.255883,0.245840,...,0.015520,0.033263,-0.002897,-0.000311,0.015079,0.030393,0.002482,0.003276,0.011608,0.022789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2698,3013,other,0.395888,0.358894,0.333479,0.314093,0.297802,0.385549,0.371069,0.351853,...,-0.045848,-0.035769,-0.027164,-0.032981,-0.022423,-0.012188,-0.011186,-0.014968,-0.010836,-0.004923
2699,3014,other,0.406378,0.367929,0.345697,0.333675,0.318458,0.409847,0.392670,0.367901,...,-0.066024,-0.062162,-0.032890,-0.043874,-0.041723,-0.038970,-0.013855,-0.023703,-0.026186,-0.026672
2700,3015,other,0.423236,0.387279,0.368105,0.362933,0.353950,0.445148,0.440082,0.412516,...,-0.094170,-0.093475,-0.039322,-0.059908,-0.063745,-0.063149,-0.015400,-0.031058,-0.035359,-0.035888
2701,3016,other,0.435217,0.409907,0.405242,0.408530,0.410300,0.488315,0.499606,0.472815,...,-0.109065,-0.117260,-0.037740,-0.063485,-0.077922,-0.084394,-0.016269,-0.034210,-0.043090,-0.047416


In [106]:
df_wide.to_csv("LSTM_6.csv", index=False)

In [11]:
def compute_rich_features(df):
    base_features = {}
    
    # 1. Raw coordinates (x0-x20, y0-y20, z0-z20)
    for axis in 'xyz':
        for i in range(21):
            col = f'{axis}_{i}'
            base_features[col] = df[col]

    # 2. Temporal difference (1st order velocity features)
    diff_features = {}
    for axis in 'xyz':
        for i in range(21):
            col = f'{axis}_{i}'
            diff_col = f'd_{axis}_{i}'
            diff_features[diff_col] = df[col].diff().fillna(0)

    # 3. Pairwise distances
    def dist(a, b):
        return np.linalg.norm(df[[f'{axis}_{a}' for axis in 'xyz']].values -
                              df[[f'{axis}_{b}' for axis in 'xyz']].values, axis=1)
    
    pairwise = {
        'thumb_index_dist': dist(4, 8),
        'wrist_index_dist': dist(0, 5),
        'pinky_index_dist': dist(20, 8),
        'palm_width': dist(5, 17)
    }

    # 4. Spread (std dev of finger tips)
    tip_ids = [4, 8, 12, 16, 20]
    tip_x = df[[f'x_{i}' for i in tip_ids]]
    tip_y = df[[f'y_{i}' for i in tip_ids]]
    tip_z = df[[f'z_{i}' for i in tip_ids]]
    spread = {
        'spread_x': tip_x.std(axis=1),
        'spread_y': tip_y.std(axis=1),
        'spread_z': tip_z.std(axis=1),
    }

    # 5. Normalized distances
    norm = {
        'norm_thumb_index': pairwise['thumb_index_dist'] / (pairwise['palm_width'] + 1e-6),
        'norm_wrist_index': pairwise['wrist_index_dist'] / (pairwise['palm_width'] + 1e-6),
    }

    # 6. Z mean/std tip + normalize
    z_mean_tip = tip_z.mean(axis=1)
    z_std_tip = tip_z.std(axis=1)
    z_features = {
        'z_mean_tip': z_mean_tip,
        'z_std_tip': z_std_tip,
        'z_mean_tip_norm': z_mean_tip / (pairwise['palm_width'] + 1e-6)
    }

    # 7. Thumb-index angle in 2D XY
    v1 = df[['x_4', 'y_4']].values - df[['x_0', 'y_0']].values
    v2 = df[['x_8', 'y_8']].values - df[['x_0', 'y_0']].values
    dot = np.einsum('ij,ij->i', v1, v2)
    norm_v1 = np.linalg.norm(v1, axis=1)
    norm_v2 = np.linalg.norm(v2, axis=1)
    cos_angle = dot / (norm_v1 * norm_v2 + 1e-6)
    angle_features = {
        'thumb_index_angle_xy': np.arccos(np.clip(cos_angle, -1.0, 1.0))
    }

    # 8. Velocity magnitude
    vel_mag = np.sqrt(
        sum((diff_features[f'd_{a}_{i}'] ** 2 for a in 'xyz' for i in range(21)))
    )
    velocity = {'velocity_magnitude': vel_mag}

    # รวมทั้งหมด
    features = pd.concat([
        pd.DataFrame(base_features),
        pd.DataFrame(diff_features),
        pd.DataFrame(pairwise, index=df.index),
        pd.DataFrame(spread, index=df.index),
        pd.DataFrame(norm, index=df.index),
        pd.DataFrame(z_features, index=df.index),
        pd.DataFrame(angle_features, index=df.index),
        pd.DataFrame(velocity, index=df.index),
        df[['Gesture']]  # Label
    ], axis=1)

    return features

In [12]:
df_features = compute_rich_features(df_wide)
df_features

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,...,spread_y,spread_z,norm_thumb_index,norm_wrist_index,z_mean_tip,z_std_tip,z_mean_tip_norm,thumb_index_angle_xy,velocity_magnitude,Gesture
0,0.334776,0.285230,0.256761,0.243974,0.236150,0.274453,0.257871,0.246749,0.241195,0.304236,...,0.056765,0.004634,1.350696,2.224122,0.014826,0.004634,0.198304,0.284904,0.000000,other
1,0.333568,0.286301,0.258806,0.244918,0.234262,0.272687,0.256561,0.246050,0.242038,0.303537,...,0.058406,0.007161,1.380330,2.121336,0.028760,0.007161,0.367113,0.326982,0.044206,other
2,0.333439,0.285277,0.258234,0.243815,0.231658,0.272503,0.255895,0.245980,0.241896,0.303717,...,0.056601,0.005535,1.360860,2.107888,0.032085,0.005535,0.407399,0.328353,0.014940,other
3,0.333571,0.285669,0.258791,0.244590,0.232594,0.272561,0.255767,0.245902,0.241951,0.303809,...,0.056581,0.005400,1.352812,2.101922,0.031077,0.005400,0.395060,0.321603,0.004173,other
4,0.333600,0.285918,0.259218,0.246102,0.236276,0.271986,0.255883,0.245840,0.241680,0.303348,...,0.057179,0.005780,1.356272,2.106376,0.031786,0.005780,0.402295,0.307252,0.006523,other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2698,0.395888,0.358894,0.333479,0.314093,0.297802,0.385549,0.371069,0.351853,0.335425,0.412609,...,0.035169,0.019377,1.066431,2.319429,-0.028957,0.019377,-0.426768,0.218137,0.097122,other
2699,0.406378,0.367929,0.345697,0.333675,0.318458,0.409847,0.392670,0.367901,0.346437,0.435196,...,0.037310,0.020354,0.873171,2.021085,-0.054096,0.020354,-0.735484,0.165105,0.128129,other
2700,0.423236,0.387279,0.368105,0.362933,0.353950,0.445148,0.440082,0.412516,0.382546,0.466400,...,0.044246,0.030833,0.840046,1.690482,-0.081070,0.030833,-0.889526,0.158159,0.186270,other
2701,0.435217,0.409907,0.405242,0.408530,0.410300,0.488315,0.499606,0.472815,0.439170,0.495902,...,0.059269,0.036762,1.179512,1.683380,-0.102613,0.036762,-1.103864,0.130940,0.251226,other


In [13]:
for col in df_features.columns:
    print(col)

x_0
x_1
x_2
x_3
x_4
x_5
x_6
x_7
x_8
x_9
x_10
x_11
x_12
x_13
x_14
x_15
x_16
x_17
x_18
x_19
x_20
y_0
y_1
y_2
y_3
y_4
y_5
y_6
y_7
y_8
y_9
y_10
y_11
y_12
y_13
y_14
y_15
y_16
y_17
y_18
y_19
y_20
z_0
z_1
z_2
z_3
z_4
z_5
z_6
z_7
z_8
z_9
z_10
z_11
z_12
z_13
z_14
z_15
z_16
z_17
z_18
z_19
z_20
d_x_0
d_x_1
d_x_2
d_x_3
d_x_4
d_x_5
d_x_6
d_x_7
d_x_8
d_x_9
d_x_10
d_x_11
d_x_12
d_x_13
d_x_14
d_x_15
d_x_16
d_x_17
d_x_18
d_x_19
d_x_20
d_y_0
d_y_1
d_y_2
d_y_3
d_y_4
d_y_5
d_y_6
d_y_7
d_y_8
d_y_9
d_y_10
d_y_11
d_y_12
d_y_13
d_y_14
d_y_15
d_y_16
d_y_17
d_y_18
d_y_19
d_y_20
d_z_0
d_z_1
d_z_2
d_z_3
d_z_4
d_z_5
d_z_6
d_z_7
d_z_8
d_z_9
d_z_10
d_z_11
d_z_12
d_z_13
d_z_14
d_z_15
d_z_16
d_z_17
d_z_18
d_z_19
d_z_20
thumb_index_dist
wrist_index_dist
pinky_index_dist
palm_width
spread_x
spread_y
spread_z
norm_thumb_index
norm_wrist_index
z_mean_tip
z_std_tip
z_mean_tip_norm
thumb_index_angle_xy
velocity_magnitude
Gesture


In [14]:
df_features.isnull().sum()

x_0                     2
x_1                     2
x_2                     2
x_3                     2
x_4                     2
                       ..
z_std_tip               2
z_mean_tip_norm         2
thumb_index_angle_xy    3
velocity_magnitude      0
Gesture                 0
Length: 141, dtype: int64

In [15]:
df_features[df_features.isnull().any(axis=1)]

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,...,spread_y,spread_z,norm_thumb_index,norm_wrist_index,z_mean_tip,z_std_tip,z_mean_tip_norm,thumb_index_angle_xy,velocity_magnitude,Gesture
1471,,,,,,,,,,,...,,,,,0.057774,,,,0.00608,h
2296,0.510611,,,,,,,,,,...,,,,,,,,,0.004536,h
2297,,0.521561,0.521414,0.509326,0.503386,0.514171,0.497316,0.488248,0.486832,0.498393,...,0.06655,0.013265,0.782094,,0.015843,0.013265,0.16868,,0.0,p


In [16]:
df_features = df_features.dropna()
df_features

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,...,spread_y,spread_z,norm_thumb_index,norm_wrist_index,z_mean_tip,z_std_tip,z_mean_tip_norm,thumb_index_angle_xy,velocity_magnitude,Gesture
0,0.334776,0.285230,0.256761,0.243974,0.236150,0.274453,0.257871,0.246749,0.241195,0.304236,...,0.056765,0.004634,1.350696,2.224122,0.014826,0.004634,0.198304,0.284904,0.000000,other
1,0.333568,0.286301,0.258806,0.244918,0.234262,0.272687,0.256561,0.246050,0.242038,0.303537,...,0.058406,0.007161,1.380330,2.121336,0.028760,0.007161,0.367113,0.326982,0.044206,other
2,0.333439,0.285277,0.258234,0.243815,0.231658,0.272503,0.255895,0.245980,0.241896,0.303717,...,0.056601,0.005535,1.360860,2.107888,0.032085,0.005535,0.407399,0.328353,0.014940,other
3,0.333571,0.285669,0.258791,0.244590,0.232594,0.272561,0.255767,0.245902,0.241951,0.303809,...,0.056581,0.005400,1.352812,2.101922,0.031077,0.005400,0.395060,0.321603,0.004173,other
4,0.333600,0.285918,0.259218,0.246102,0.236276,0.271986,0.255883,0.245840,0.241680,0.303348,...,0.057179,0.005780,1.356272,2.106376,0.031786,0.005780,0.402295,0.307252,0.006523,other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2698,0.395888,0.358894,0.333479,0.314093,0.297802,0.385549,0.371069,0.351853,0.335425,0.412609,...,0.035169,0.019377,1.066431,2.319429,-0.028957,0.019377,-0.426768,0.218137,0.097122,other
2699,0.406378,0.367929,0.345697,0.333675,0.318458,0.409847,0.392670,0.367901,0.346437,0.435196,...,0.037310,0.020354,0.873171,2.021085,-0.054096,0.020354,-0.735484,0.165105,0.128129,other
2700,0.423236,0.387279,0.368105,0.362933,0.353950,0.445148,0.440082,0.412516,0.382546,0.466400,...,0.044246,0.030833,0.840046,1.690482,-0.081070,0.030833,-0.889526,0.158159,0.186270,other
2701,0.435217,0.409907,0.405242,0.408530,0.410300,0.488315,0.499606,0.472815,0.439170,0.495902,...,0.059269,0.036762,1.179512,1.683380,-0.102613,0.036762,-1.103864,0.130940,0.251226,other


In [87]:
cols_to_drop = [f"{axis}_{i}" for axis in ['x', 'y', 'z'] for i in range(21)] + \
               [f"d_{axis}_{i}" for axis in ['x', 'y', 'z'] for i in range(21)]

df_features_clean = df_features.drop(columns=cols_to_drop)


In [89]:
df_features_clean

Unnamed: 0,thumb_index_dist,wrist_index_dist,pinky_index_dist,palm_width,spread_x,spread_y,spread_z,norm_thumb_index,norm_wrist_index,z_mean_tip,z_std_tip,z_mean_tip_norm,thumb_index_angle_xy,velocity_magnitude,Gesture
0,0.100982,0.166283,0.091066,0.074762,0.039528,0.056765,0.004634,1.350696,2.224122,0.014826,0.004634,0.198304,0.284904,0.000000,other
1,0.108138,0.166190,0.090168,0.078341,0.039642,0.058406,0.007161,1.380330,2.121336,0.028760,0.007161,0.367113,0.326982,0.044206,other
2,0.107175,0.166007,0.091285,0.078754,0.040867,0.056601,0.005535,1.360860,2.107888,0.032085,0.005535,0.407399,0.328353,0.014940,other
3,0.106418,0.165346,0.091030,0.078663,0.040539,0.056581,0.005400,1.352812,2.101922,0.031077,0.005400,0.395060,0.321603,0.004173,other
4,0.107162,0.166429,0.091558,0.079011,0.039878,0.057179,0.005780,1.356272,2.106376,0.031786,0.005780,0.402295,0.307252,0.006523,other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2698,0.072358,0.157376,0.085686,0.067850,0.043021,0.035169,0.019377,1.066431,2.319429,-0.028957,0.019377,-0.426768,0.218137,0.097122,other
2699,0.064223,0.148654,0.089834,0.073550,0.041476,0.037310,0.020354,0.873171,2.021085,-0.054096,0.020354,-0.735484,0.165105,0.128129,other
2700,0.076561,0.154069,0.077075,0.091138,0.023263,0.044246,0.030833,0.840046,1.690482,-0.081070,0.030833,-0.889526,0.158159,0.186270,other
2701,0.109645,0.156484,0.091428,0.092957,0.017344,0.059269,0.036762,1.179512,1.683380,-0.102613,0.036762,-1.103864,0.130940,0.251226,other


In [92]:
df_features_clean.to_csv("features_clean_6.csv", index=False)