In [1]:
import sys
sys.path.append("../../")

In [2]:
import numpy as np
import open3d as o3d
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import joblib
import time
import math
import glob
import os
from datetime import datetime

from hand_landmarks.neural_networks.mlp import MLP_v3

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
def visualize_landmarks_through_frame(landmarks, time_sleep=0.01):
    assert landmarks.shape[1:] == (21, 3)
    
    x = np.array([[500, 0, 0],
                  [0, 0, 0]])
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(x)

    lines = [[0, 0]]
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set = o3d.geometry.LineSet(
        points=o3d.utility.Vector3dVector(x),
        lines=o3d.utility.Vector2iVector(lines)
    )
    line_set.colors = o3d.utility.Vector3dVector(colors)
    
    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(pcd)
    vis.add_geometry(line_set)

    for i in range(landmarks.shape[0]):
        hand_lmks = landmarks[i]
        pcd.points = o3d.utility.Vector3dVector(hand_lmks)

        lines = [[0,1],[1,2],[2,3],[3,4], 
                 [0,5],[5,6],[6,7],[7,8],
                 [5,9],[9,10],[10,11],[11,12],
                 [9,13],[13,14],[14,15],[15,16],
                 [13,17],[17,18],[18,19],[19,20],[0,17]]
        colors = [[1, 0, 0] for i in range(len(lines))]
        line_set.points = o3d.utility.Vector3dVector(hand_lmks)  # Update the points
        line_set.lines = o3d.utility.Vector2iVector(lines)  # Update the lines
        line_set.colors = o3d.utility.Vector3dVector(colors)

        vis.update_geometry(pcd)
        vis.update_geometry(line_set)
        vis.poll_events()
        vis.update_renderer()
        
        time.sleep(time_sleep)

    vis.destroy_window()

In [4]:
def write_data_to_csv(file_name, data, num_cam=2):
    num_points_each_joint = 3
    num_joints_each_hand = 21
    num_input_cols = num_cam * num_points_each_joint * num_joints_each_hand

    input_header = input_cam1_header
    for i in range(2, num_cam+1):
        input_cam_i_header = input_cam1_header.replace("cam1", "cam{}".format(i))
        input_header += ',' + input_cam_i_header

    output_header = input_cam1_header.replace("cam1_", "").replace("in", "out")
    csv_header = input_header + ',' + output_header

    assert len(csv_header.split(",")) == data.shape[1]

    np.savetxt(file_name, data, delimiter=',', fmt='%f', header=csv_header, comments='')

In [5]:
finger_joints_names = [
    "WRIST",
    "THUMB_CMC", "THUMB_MCP", "THUMB_IP", "THUMB_TIP",
    "INDEX_FINGER_MCP", "INDEX_FINGER_PIP", "INDEX_FINGER_DIP", "INDEX_FINGER_TIP",
    "MIDDLE_FINGER_MCP", "MIDDLE_FINGER_PIP", "MIDDLE_FINGER_DIP", "MIDDLE_FINGER_TIP",
    "RING_FINGER_MCP", "RING_FINGER_PIP", "RING_FINGER_DIP", "RING_FINGER_TIP",
    "PINKY_MCP", "PINKY_PIP", "PINKY_DIP", "PINKY_TIP"
]

# Visualize GTs

In [5]:
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_16_12.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_1.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_3.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_6.npz')

hand_lmks_gt = hand_lmks_file["landmarks_output"]

In [6]:
visualize_landmarks_through_frame(hand_lmks_gt, time_sleep=0.01)



# Visualize raw landmarks

In [11]:
hand_lmks_file = np.load('/home/giakhang/dev/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_16_12.npz')
hand_lmks_input = hand_lmks_file["raw_xyZ_of_opposite_cam"]

In [12]:
visualize_landmarks_through_frame(hand_lmks_input, time_sleep=0.01)

In [13]:
hand_lmks_input = hand_lmks_file["raw_xyZ_of_rightside_cam"]

In [14]:
visualize_landmarks_through_frame(hand_lmks_input, time_sleep=0.01)

# Prepare data

## Visualize for verifing that we save the correct landmarks

In [5]:
# Load the data from a CSV file
train_data = pd.read_csv("/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv")

num_output_nodes = 21 * 3
X_train = train_data.iloc[:, :-(num_output_nodes)]
Y_train = train_data.iloc[:, -(num_output_nodes):]

FileNotFoundError: [Errno 2] No such file or directory: '/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv'

In [None]:
num_landmarks_each_cam = 21 * 3
X_train_cam_1 = X_train.values[:, :num_landmarks_each_cam]

In [36]:
visualize_landmarks_through_frame(X_train_cam_1.reshape(-1, 21, 3), time_sleep=0.1)

NameError: name 'X_train_cam_1' is not defined

In [24]:
visualize_landmarks_through_frame(Y_train.values.reshape(-1, 21, 3), time_sleep=0.1)

In [35]:
X_train_cam_2 = X_train.values[:, num_landmarks_each_cam:]
visualize_landmarks_through_frame(X_train_cam_2.reshape(-1, 21, 3), time_sleep=0.1)

In [28]:
# Load the data from a CSV file
test_data = pd.read_csv('/home/giakhang/dev/Hand_pose_estimation_3D/hand_landmarks/data/test_hand_landmarks_2024_6_14_16_12.csv')

num_output_nodes = 21 * 3
X_test = test_data.iloc[:, :-(num_output_nodes)]
Y_test = test_data.iloc[:, -(num_output_nodes):]

In [31]:
num_landmarks_each_cam = 21 * 3
X_test_cam_1 = X_test.values[:, :num_landmarks_each_cam]
visualize_landmarks_through_frame(X_test_cam_1.reshape(-1, 21, 3), time_sleep=0.1)

In [32]:
X_test_cam_2 = X_test.values[:, num_landmarks_each_cam:]
visualize_landmarks_through_frame(X_test_cam_2.reshape(-1, 21, 3), time_sleep=0.1)

In [33]:
visualize_landmarks_through_frame(Y_test.values.reshape(-1, 21, 3), time_sleep=0.1)

In [6]:
num_landmarks = 21 * 3
landmarks_opposite_cam = X.values[:, :num_landmarks]
landmarks_rightside_cam = X.values[:, num_landmarks:]

In [7]:
landmarks_opposite_cam = landmarks_opposite_cam.reshape(landmarks_opposite_cam.shape[0], 21, -1)
landmarks_rightside_cam = landmarks_rightside_cam.reshape(landmarks_rightside_cam.shape[0], 21, -1)
landmarks_gt = Y.values
landmarks_gt = landmarks_gt.reshape(landmarks_gt.shape[0], 21, -1)

In [9]:
visualize_landmarks_through_frame(landmarks_rightside_cam)

In [5]:
train_data_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv"
test_data_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/test_hand_landmarks_2024_6_14_16_12.csv"

# Load the data from a CSV file
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

num_output_nodes = 21 * 3
X_train = train_data.iloc[:, :-(num_output_nodes)]
Y_train = train_data.iloc[:, -(num_output_nodes):]
X_test = test_data.iloc[:, :-(num_output_nodes)]
Y_test = test_data.iloc[:, -(num_output_nodes):]

## Prepare data

In [6]:
data_folder = "/home/giakhang/dev/Hand_pose_estimation_3D/hand_landmarks/data"

In [7]:
train_data_files = glob.glob(os.path.join(data_folder, "*/train*.csv"))

In [8]:
train_data_frames = []

for file in train_data_files:
    train_data_frame = pd.read_csv(file)
    train_data_frames.append(train_data_frame)

# Concatenate all data frames into a single data frame
merged_train_data = pd.concat(train_data_frames, ignore_index=True)

In [9]:
test_data_files = [file.replace("train", "test") for file in train_data_files]

test_data_frames = []

for file in test_data_files:
    test_df = pd.read_csv(file)
    test_data_frames.append(test_df)

merged_test_data = pd.concat(test_data_frames, ignore_index=True)

In [10]:
num_output_nodes = 21 * 3
X_train = merged_train_data.iloc[:, :-(num_output_nodes)]
Y_train = merged_train_data.iloc[:, -(num_output_nodes):]
X_test = merged_test_data.iloc[:, :-(num_output_nodes)]
Y_test = merged_test_data.iloc[:, -(num_output_nodes):]

In [11]:
# Uncomment these lines to plot input data

#num_landmarks_each_cam = 21 * 3
#X_train_cam_1 = X_train.values[:, :num_landmarks_each_cam]
#visualize_landmarks_through_frame(X_train_cam_1.reshape(-1, 21, 3), time_sleep=0.1)

In [12]:
# Uncomment these lines to plot ground truth

#visualize_landmarks_through_frame(Y_train.reshape(-1, 21, 3), time_sleep=0.1)

In [13]:
print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)

X_train shape:  (10400, 126)
Y_train shape:  (10400, 63)
X_test shape:  (2600, 126)
Y_test shape:  (2600, 63)


In [15]:
Y_train.describe()

Unnamed: 0,X00_out,Y00_out,Z00_out,X01_out,Y01_out,Z01_out,X02_out,Y02_out,Z02_out,X03_out,...,Z17_out,X18_out,Y18_out,Z18_out,X19_out,Y19_out,Z19_out,X20_out,Y20_out,Z20_out
count,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,...,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0,10400.0
mean,0.0,0.0,0.0,14.384262,30.433786,26.121779,27.208326,69.310006,40.952818,42.165017,...,-34.929597,33.810691,107.851086,-38.386174,42.899233,111.029183,-37.531904,47.50309,113.788273,-36.67738
std,0.0,0.0,0.0,8.964156,11.815951,8.123622,14.647924,21.163211,14.433597,22.556395,...,9.040365,18.978196,26.375667,17.172874,23.74939,32.916211,23.686099,29.093467,39.883516,28.624345
min,-0.0,0.0,0.0,-23.670697,-64.748673,-15.538766,-45.011038,-100.946847,-30.27322,-71.061024,...,-68.384243,-53.93457,-6.679083,-111.734739,-74.552063,-15.94611,-136.512313,-88.271189,-16.220842,-147.103595
25%,-0.0,0.0,0.0,8.630123,24.054257,20.903517,19.131683,57.561561,31.228009,30.82997,...,-41.450894,20.756016,89.756673,-50.464206,30.694122,88.443774,-55.101766,30.856623,83.616217,-58.167145
50%,0.0,0.0,-0.0,13.318106,30.62811,25.881334,28.649104,70.18211,40.132983,46.463591,...,-34.887115,37.019401,105.615521,-40.068691,46.092255,109.463048,-38.212302,46.951125,112.757259,-34.739145
75%,0.0,0.0,0.0,19.575928,37.069663,31.563465,37.214256,81.890697,50.657666,58.257368,...,-29.073917,48.063218,124.227692,-26.535415,57.998158,131.366622,-20.395713,68.336226,138.910401,-16.72554
max,-0.0,0.0,0.0,48.86591,107.029399,58.152949,69.589181,175.967389,98.774789,96.745347,...,19.174742,87.083757,217.016422,49.079177,109.968209,250.35586,65.392882,124.22271,276.665438,80.980299


In [28]:
Y_train.describe().iloc[2, :].values

array([ 0.        ,  0.        ,  0.        ,  8.96415625, 11.81595119,
        8.12362204, 14.64792397, 21.16321122, 14.43359667, 22.55639506,
       28.43375508, 24.4261165 , 29.47037701, 33.42436142, 35.9234739 ,
        0.        , 21.02795644,  4.50783508, 19.10116162, 29.1421774 ,
        8.96058168, 24.16547652, 39.68030486, 12.57208705, 29.38690075,
       49.62840668, 15.7534795 ,  0.        , 20.8394099 ,  0.        ,
       22.53136462, 31.1066665 ,  8.80200547, 27.79026472, 43.81268544,
       13.03137639, 33.2542285 , 54.06295757, 16.24335107,  2.6022623 ,
       20.53037672,  4.39577203, 21.11668135, 29.63626699, 11.92226274,
       25.63784654, 41.03067118, 16.73015903, 31.09900514, 50.97113753,
       19.95717599,  6.56992481, 20.31298114,  9.0403653 , 18.97819575,
       26.37566735, 17.17287423, 23.74938969, 32.91621116, 23.68609937,
       29.09346749, 39.88351613, 28.62434487])

In [29]:
import scipy

In [34]:
from scipy import stats
import numpy as np

z_scores = np.abs(stats.zscore(Y_train))
threshold = 3
outliers = z_scores < threshold

In [35]:
outliers

Unnamed: 0,X00_out,Y00_out,Z00_out,X01_out,Y01_out,Z01_out,X02_out,Y02_out,Z02_out,X03_out,...,Z17_out,X18_out,Y18_out,Z18_out,X19_out,Y19_out,Z19_out,X20_out,Y20_out,Z20_out
0,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
1,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
3,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
4,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10395,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
10396,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
10397,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
10398,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True


In [36]:
mask = outliers.iloc[:, 3:]

In [42]:
np.sum(np.all(mask.values, axis=1))

0

In [44]:
mask.iloc[0, :]

X01_out     True
Y01_out     True
Z01_out     True
X02_out     True
Y02_out     True
Z02_out     True
X03_out     True
Y03_out     True
Z03_out     True
X04_out     True
Y04_out     True
Z04_out     True
X05_out    False
Y05_out     True
Z05_out     True
X06_out     True
Y06_out     True
Z06_out     True
X07_out     True
Y07_out     True
Z07_out     True
X08_out     True
Y08_out     True
Z08_out     True
X09_out    False
Y09_out     True
Z09_out    False
X10_out     True
Y10_out     True
Z10_out     True
X11_out     True
Y11_out     True
Z11_out     True
X12_out     True
Y12_out     True
Z12_out     True
X13_out     True
Y13_out     True
Z13_out     True
X14_out     True
Y14_out     True
Z14_out     True
X15_out     True
Y15_out     True
Z15_out     True
X16_out     True
Y16_out     True
Z16_out     True
X17_out     True
Y17_out     True
Z17_out     True
X18_out     True
Y18_out     True
Z18_out     True
X19_out     True
Y19_out     True
Z19_out     True
X20_out     True
Y20_out     Tr