In [1]:
import os
import sys
import glob

sys.path.append("/home/giakhang/dev/pose_sandbox/Hand_pose_estimation_3D/arm_and_hand")
sys.path.append("/home/giakhang/dev/pose_sandbox/Hand_pose_estimation_3D")

from dataloader_ann import HandArmLandmarksDataset_ANN

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
from datetime import datetime

from utilities import fuse_landmarks_from_two_cameras
from functools import partial


In [2]:
fusing_model = partial(fuse_landmarks_from_two_cameras,
    tolerance=1e-15,
    method_name="L-BFGS-B")

In [3]:
DATA_DIR = "/home/giakhang/dev/pose_sandbox/data"  
SELECTED_DATE = "2024-08-29*"

train_paths = glob.glob(os.path.join(DATA_DIR, "{}/{}/fine_landmarks_{}_*.csv".format(SELECTED_DATE, SELECTED_DATE, "train")))
val_paths = glob.glob(os.path.join(DATA_DIR, "{}/{}/fine_landmarks_{}_*.csv".format(SELECTED_DATE, SELECTED_DATE, "val")))

body_lines = [[0,2], [0, 3], [2, 4], [3, 4]]
lefthand_lines = [[0, 1], [1, 5], [5, 6], [5, 10], [5, 22], [10, 14], [14, 18], [18, 22], 
    [6, 7], [7, 8], [8, 9], 
    [10, 11], [11, 12], [12, 13], 
    [14, 15], [15, 16], [16, 17], 
    [18, 19], [19, 20], [20, 21], 
    [22, 23], [23, 24], [24, 25]]
body_distance_thres=500
leftarm_distance_thres=500
lefthand_distance_thres=200

train_dataset = HandArmLandmarksDataset_ANN(train_paths, 
    body_lines, 
    lefthand_lines, 
    body_distance_thres, 
    leftarm_distance_thres, 
    lefthand_distance_thres,
    filter_outlier=False,
    only_keep_frames_contain_lefthand=True)

In [4]:
input_data = train_dataset._inputs

In [5]:
input_data.shape

(107, 322)

# Slicing data

In [6]:
left_camera_first_intrinsic_value_idx = 144
right_camera_first_lmk_value_idx = left_camera_first_intrinsic_value_idx + 9
right_camera_first_intrinsic_value_idx = right_camera_first_lmk_value_idx + 144
first_right_2_left_matrix_value_idx = right_camera_first_intrinsic_value_idx + 9

left_camera_lmks = input_data[:, :left_camera_first_intrinsic_value_idx]  # shape: (N, 144), N = #rows
left_camera_intrinsic = input_data[:, left_camera_first_intrinsic_value_idx:right_camera_first_lmk_value_idx]  # shape: (N, 9), N = #rows
right_camera_lmks = input_data[:, right_camera_first_lmk_value_idx:right_camera_first_intrinsic_value_idx]  # shape: (N, 144), N = #rows
right_camera_intrinsic = input_data[:, right_camera_first_intrinsic_value_idx:first_right_2_left_matrix_value_idx]  # shape: (N, 9), N = #rows
right_2_left_mat = input_data[:, first_right_2_left_matrix_value_idx:]  # shape: (N, 16), N = #rows

In [7]:
right_camera_lmks.shape

(107, 144)

Get records which have depth (z != 0)

In [8]:
left_camera_lmks = left_camera_lmks.reshape(-1, 3, 48)  # shape: (N, 3, 48)
left_camera_lmks_z_values = left_camera_lmks[:, -1, :]
left_camera_rows_contain_z_idx = np.where(np.sum(left_camera_lmks_z_values, axis=1) != 0)[0]
left_camera_rows_contain_z_idx.shape

(107,)

In [9]:
right_camera_lmks = right_camera_lmks.reshape(-1, 3, 48)
right_camera_lmks_z_values = right_camera_lmks[:, -1, :]
right_camera_rows_contain_z_idx = np.where(np.sum(right_camera_lmks_z_values, axis=1) != 0)[0]
right_camera_rows_contain_z_idx.shape

(107,)

In [10]:
common_id_mask = left_camera_rows_contain_z_idx == right_camera_rows_contain_z_idx
fake_rows_selected_id = left_camera_rows_contain_z_idx[common_id_mask]

Set depth = 0

In [11]:
fake_left_camera_lmks = left_camera_lmks[fake_rows_selected_id]
fake_left_camera_lmks.shape

(107, 3, 48)

In [12]:
np.count_nonzero(fake_left_camera_lmks[:, -1, :])

2629

In [13]:
mask = np.random.choice([0, 1], size=(fake_left_camera_lmks[:, -1, :].shape), p=[0.75, 0.25])

In [14]:
fake_left_camera_lmks[:, -1, :] = fake_left_camera_lmks[:, -1, :] * np.random.choice([0, 1], size=(fake_left_camera_lmks[:, -1, :].shape), p=[0.75, 0.25])
#fake_left_camera_lmks[:, -1, :] = np.ones_like(fake_left_camera_lmks[:, -1, :]) 
#fake_left_camera_lmks[:, -1, :] = fake_left_camera_lmks[:, -1, :] * mask

In [15]:
np.count_nonzero(fake_left_camera_lmks[:, -1, :])

651

In [16]:
fake_right_camera_lmks = right_camera_lmks[fake_rows_selected_id]
fake_right_camera_lmks.shape

(107, 3, 48)

In [17]:
np.count_nonzero(fake_right_camera_lmks[:, -1, :])

2776

In [18]:
fake_right_camera_lmks[:, -1, :] = fake_right_camera_lmks[:, -1, :] * np.random.choice([0, 1], size=(fake_left_camera_lmks[:, -1, :].shape), p=[0.75, 0.25])
#fake_right_camera_lmks[:, -1, :] = np.ones_like(fake_left_camera_lmks[:, -1, :])
#fake_right_camera_lmks[:, -1, :] = fake_right_camera_lmks[:, -1, :] * mask
np.count_nonzero(fake_right_camera_lmks[:, -1, :])

651

In [19]:
fake_left_camera_lmks.shape

(107, 3, 48)

In [20]:
selected_left_camera_intrinsic = left_camera_intrinsic[fake_rows_selected_id]  # shape: (N*, 9), N* = #selected_rows
selected_right_camera_intrinsic = right_camera_intrinsic[fake_rows_selected_id]  # shape: (N*, 9), N* = #selected_rows
selected_right_2_left_mat = right_2_left_mat[fake_rows_selected_id]  # shape: (N*, 9), N* = #selected_rows

In [21]:
img_w = 1920 
img_h = 1080

In [22]:
fake_left_camera_lmks[:, 0, :] = fake_left_camera_lmks[:, 0, :]  * img_w  # shape: (N*, 3, 48), N* = #selected_rows
fake_left_camera_lmks[:, 1, :] = fake_left_camera_lmks[:, 1, :] * img_h  # shape: (N*, 3, 48), N* = #selected_rows
fake_right_camera_lmks[:, 0, :] = fake_right_camera_lmks[:, 0, :] * img_w  # shape: (N*, 3, 48), N* = #selected_rows
fake_right_camera_lmks[:, 1, :] = fake_right_camera_lmks[:, 1, :] * img_h  # shape: (N*, 3, 48), N* = #selected_rows

In [23]:
fake_left_camera_lmks = np.transpose(fake_left_camera_lmks, (0, 2, 1))  # shape: (N*, 48, 3), N* = #selected_rows
fake_right_camera_lmks = np.transpose(fake_right_camera_lmks, (0, 2, 1))  # shape: (N*, 48, 3)

In [24]:
fake_fusing_data = []
for i in range(fake_left_camera_lmks.shape[0]):
    left_xyZ = fake_left_camera_lmks[i]
    right_xyZ = fake_right_camera_lmks[i]
    left_intr = selected_left_camera_intrinsic[i].reshape(3, 3)
    right_intr = selected_right_camera_intrinsic[i].reshape(3, 3)
    right_2_left_mat = selected_right_2_left_mat[i].reshape(4, 4)
    fused_XYZ = fusing_model(left_xyZ,
        right_xyZ,
        right_intr,
        left_intr,
        right_2_left_mat)
    fake_fusing_data.append(fused_XYZ.flatten())

In [25]:
fake_fusing_data = np.array(fake_fusing_data)

In [26]:
arm_hand_fused_names = ["left shoulder", "left elbow", "left hip", "right shoulder", "right hip", 
 "WRIST", "THUMB_CMC", "THUMB_MCP", "THUMB_IP", "THUMB_TIP", "INDEX_FINGER_MCP", 
 "INDEX_FINGER_PIP", "INDEX_FINGER_DIP", "INDEX_FINGER_TIP", "MIDDLE_FINGER_MCP", 
 "MIDDLE_FINGER_PIP", "MIDDLE_FINGER_DIP", "MIDDLE_FINGER_TIP", "RING_FINGER_MCP", 
 "RING_FINGER_PIP", "RING_FINGER_DIP", "RING_FINGER_TIP", "PINKY_MCP", "PINKY_PIP", 
 "PINKY_DIP", "PINKY_TIP", "right elbow"]

body_lines = [[0,2], [0, 3], [2, 4], [3, 4]]
lefthand_lines = [[0, 1], [1, 5], [5, 6], [5, 10], [5, 22], [10, 14], [14, 18], [18, 22], 
    [6, 7], [7, 8], [8, 9], 
    [10, 11], [11, 12], [12, 13], 
    [14, 15], [15, 16], [16, 17], 
    [18, 19], [19, 20], [20, 21], 
    [22, 23], [23, 24], [24, 25]]

In [27]:
gt_lmks = train_dataset._outputs
gt_lmks = gt_lmks.reshape(-1, 3, 48)
gt_lmks = np.transpose(gt_lmks, (0, 2, 1))

merged_fake_and_gt = np.concatenate([gt_lmks, fake_fusing_data.reshape(-1, 48, 3)], axis=0)

Plot ground-truth and fake data

In [32]:
import open3d as o3d
from utilities import convert_to_shoulder_coord
import time

time_sleep = 0.1
x = np.array([[500, 0, 0],
    [0, 0, 0]])
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(x)
lines = [[0, 0]]
colors = [[1, 0, 0] for i in range(len(lines))]
line_set = o3d.geometry.LineSet(
    points=o3d.utility.Vector3dVector(x),
    lines=o3d.utility.Vector2iVector(lines)
)
line_set.colors = o3d.utility.Vector3dVector(colors)
    
vis = o3d.visualization.Visualizer()
vis.create_window()
vis.add_geometry(pcd)
vis.add_geometry(line_set)

for i in range(merged_fake_and_gt.shape[0]):
    output = merged_fake_and_gt[i, ...]  # shape: (48, 3)

    lines = body_lines.copy()
    lines.extend(lefthand_lines)

    points, _ = convert_to_shoulder_coord(output,
        arm_hand_fused_names)

    pcd.points = o3d.utility.Vector3dVector(points)

    if i < gt_lmks.shape[0]:
        colors = [[1, 0, 0] for i in range(len(lines))]
    else:
        colors = [[0, 0, 1] for i in range(len(lines))]
    line_set.points = o3d.utility.Vector3dVector(points)  
    line_set.lines = o3d.utility.Vector2iVector(lines) 
    line_set.colors = o3d.utility.Vector3dVector(colors)

    vis.update_geometry(pcd)
    vis.update_geometry(line_set)
    vis.poll_events()
    vis.update_renderer()
        
    time.sleep(time_sleep)

vis.destroy_window()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


Plot fake data

In [32]:
import open3d as o3d
from utilities import convert_to_shoulder_coord
import time

time_sleep = 0.1
x = np.array([[500, 0, 0],
    [0, 0, 0]])
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(x)
lines = [[0, 0]]
colors = [[1, 0, 0] for i in range(len(lines))]
line_set = o3d.geometry.LineSet(
    points=o3d.utility.Vector3dVector(x),
    lines=o3d.utility.Vector2iVector(lines)
)
line_set.colors = o3d.utility.Vector3dVector(colors)
    
vis = o3d.visualization.Visualizer()
vis.create_window()
vis.add_geometry(pcd)
vis.add_geometry(line_set)

for i in range(fake_fusing_data.shape[0]):
    output = fake_fusing_data[i, ...]  # shape: (144)
    output = output.reshape(48, 3)  # shape: (48, 3)

    lines = body_lines.copy()
    lines.extend(lefthand_lines)

    points, _ = convert_to_shoulder_coord(output,
        arm_hand_fused_names)

    pcd.points = o3d.utility.Vector3dVector(points)
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set.points = o3d.utility.Vector3dVector(points)  
    line_set.lines = o3d.utility.Vector2iVector(lines) 
    line_set.colors = o3d.utility.Vector3dVector(colors)

    vis.update_geometry(pcd)
    vis.update_geometry(line_set)
    vis.poll_events()
    vis.update_renderer()
        
    time.sleep(time_sleep)

vis.destroy_window()

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


Calculate abs-error between GT data and fake data (depth = 0)

In [28]:
train_dataset._outputs

array([[-4.25242941e+01, -2.10511963e+00, -1.05293310e+02, ...,
         2.57038318e+03,  2.62442164e+03,  2.65511055e+03],
       [ 3.83211296e+01,  7.68492058e+01, -9.61939719e+00, ...,
         2.69328283e+03,  2.71155484e+03,  2.71490973e+03],
       [ 1.80402893e+01,  5.28069755e+01, -3.68680789e+01, ...,
         2.66025776e+03,  2.68894465e+03,  2.70117981e+03],
       ...,
       [-1.83994547e+01,  7.44376419e+01, -7.76488667e+01, ...,
         2.64023472e+03,  2.67795683e+03,  2.70501859e+03],
       [-2.21631086e+01,  7.99102842e+01, -7.78069431e+01, ...,
         2.62882537e+03,  2.66246453e+03,  2.68566126e+03],
       [-2.30118613e+01,  7.20885288e+01, -7.99117471e+01, ...,
         2.63425038e+03,  2.66615231e+03,  2.68851256e+03]])

In [29]:
fake_fusing_data

array([[ -42.52442052, -185.2095743 , 2218.97938764, ..., -410.07379255,
         309.3090836 , 2655.15800635],
       [  38.32093922, -181.8097672 , 2263.43605478, ..., -291.02371353,
         307.81803029, 2714.91374759],
       [  18.0403482 , -181.87053466, 2258.31249428, ..., -328.01398007,
         310.03188802, 2701.20714513],
       ...,
       [ -18.39979591, -171.56696273, 2233.55714804, ..., -419.02639827,
         358.35669457, 2705.02530216],
       [ -22.16242369, -170.45824039, 2234.11511476, ..., -412.10049938,
         346.62717294, 2685.74548267],
       [ -23.01153525, -172.24973317, 2232.76519487, ..., -412.93104161,
         349.62096188, 2688.4842939 ]])

In [30]:
true_outputs = train_dataset._outputs
error = true_outputs - fake_fusing_data
abs_error = np.absolute(error)
mean_abs_error = np.mean(abs_error)
print(mean_abs_error)

1140.8771597441114


In [31]:
1140

1140

Write fake date to file

In [105]:
from csv_writer import create_csv, append_to_csv, fusion_csv_columns_name

In [106]:
fake_date_path = "/home/giakhang/dev/pose_sandbox/Hand_pose_estimation_3D/arm_and_hand/fake_data.csv"
create_csv(fake_date_path, fusion_csv_columns_name)

In [107]:
for i in range(fake_fusing_data.shape[0]):
    left_xyZ = fake_left_camera_lmks[i]  # shape: (48, 3)
    right_xyZ = fake_right_camera_lmks[i]  # shape: (48, 3)
    left_intr = selected_left_camera_intrinsic[i]  # shape: (9)
    right_intr = selected_right_camera_intrinsic[i]  # shape: (9)
    right_2_left_mat = selected_right_2_left_mat[i]  # shape: (16)

    fused_lmks = fake_fusing_data[i, ...]  # shape: (144)

    left_xyZ = left_xyZ.T  # shape: (3, 48)
    left_xyZ[0, :] = left_xyZ[0, :] / img_w
    left_xyZ[1, :] = left_xyZ[1, :] / img_h
    right_xyZ = right_xyZ.T  # shape: (3, 48)
    right_xyZ[0, :] = right_xyZ[0, :] / img_w
    right_xyZ[1, :] = right_xyZ[1, :] / img_h
    fused_lmks = fused_lmks.reshape(48, 3)  # shape: (48, 3)
    fused_lmks = fused_lmks.T  # shape: (3, 48)

    input_row = np.concatenate([[i],
        left_xyZ.flatten(),
        left_intr.flatten(),
        right_xyZ.flatten(),
        right_intr.flatten(),
        right_2_left_mat.flatten(),
        fused_lmks.flatten()])
    append_to_csv(fake_date_path, input_row)