In [2]:
import sys
sys.path.append("../../")

In [3]:
import numpy as np
import open3d as o3d
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import joblib
import time
import math

from hand_landmarks.neural_networks.mlp import MLP

In [25]:
def visualize_landmarks_through_frame(landmarks, time_sleep=0.01):
    assert landmarks.shape[1:] == (21, 3)
    
    x = np.array([[500, 0, 0],
                  [0, 0, 0]])
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(x)

    lines = [[0, 0]]
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set = o3d.geometry.LineSet(
        points=o3d.utility.Vector3dVector(x),
        lines=o3d.utility.Vector2iVector(lines)
    )
    line_set.colors = o3d.utility.Vector3dVector(colors)
    
    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(pcd)
    vis.add_geometry(line_set)

    for i in range(landmarks.shape[0]):
        hand_lmks = landmarks[i]
        pcd.points = o3d.utility.Vector3dVector(hand_lmks)

        lines = [[0,1],[1,2],[2,3],[3,4], 
                 [0,5],[5,6],[6,7],[7,8],
                 [5,9],[9,10],[10,11],[11,12],
                 [9,13],[13,14],[14,15],[15,16],
                 [13,17],[17,18],[18,19],[19,20],[0,17]]
        colors = [[1, 0, 0] for i in range(len(lines))]
        line_set.points = o3d.utility.Vector3dVector(hand_lmks)  # Update the points
        line_set.lines = o3d.utility.Vector2iVector(lines)  # Update the lines
        line_set.colors = o3d.utility.Vector3dVector(colors)

        vis.update_geometry(pcd)
        vis.update_geometry(line_set)
        vis.poll_events()
        vis.update_renderer()
        
        time.sleep(time_sleep)

    vis.destroy_window()

In [26]:
def write_data_to_csv(file_name, data, num_cam=2):
    num_points_each_joint = 3
    num_joints_each_hand = 21
    num_input_cols = num_cam * num_points_each_joint * num_joints_each_hand

    input_header = input_cam1_header
    for i in range(2, num_cam+1):
        input_cam_i_header = input_cam1_header.replace("cam1", "cam{}".format(i))
        input_header += ',' + input_cam_i_header

    output_header = input_cam1_header.replace("cam1_", "").replace("in", "out")
    csv_header = input_header + ',' + output_header

    assert len(csv_header.split(",")) == data.shape[1]

    np.savetxt(file_name, data, delimiter=',', fmt='%f', header=csv_header, comments='')

# Visualize GTs

In [5]:
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_16_12.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_1.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_3.npz')
hand_lmks_file = np.load('/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_18_6.npz')

hand_lmks_gt = hand_lmks_file["landmarks_output"]

In [6]:
visualize_landmarks_through_frame(hand_lmks_gt, time_sleep=0.01)



# Visualize raw landmarks

In [11]:
hand_lmks_file = np.load('/home/giakhang/dev/Hand_pose_estimation_3D/hand_landmarks/data/hand_landmarks_2024_6_14_16_12.npz')
hand_lmks_input = hand_lmks_file["raw_xyZ_of_opposite_cam"]

In [12]:
visualize_landmarks_through_frame(hand_lmks_input, time_sleep=0.01)

In [13]:
hand_lmks_input = hand_lmks_file["raw_xyZ_of_rightside_cam"]

In [14]:
visualize_landmarks_through_frame(hand_lmks_input, time_sleep=0.01)

# Prepare data

## Visualize for verifing that we save the correct landmarks

In [7]:
# Load the data from a CSV file
train_data = pd.read_csv("/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv")

num_output_nodes = 21 * 3
X_train = train_data.iloc[:, :-(num_output_nodes)]
Y_train = train_data.iloc[:, -(num_output_nodes):]

In [20]:
num_landmarks_each_cam = 21 * 3
X_train_cam_1 = X_train.values[:, :num_landmarks_each_cam]

In [36]:
visualize_landmarks_through_frame(X_train_cam_1.reshape(-1, 21, 3), time_sleep=0.1)

In [24]:
visualize_landmarks_through_frame(Y_train.values.reshape(-1, 21, 3), time_sleep=0.1)

In [35]:
X_train_cam_2 = X_train.values[:, num_landmarks_each_cam:]
visualize_landmarks_through_frame(X_train_cam_2.reshape(-1, 21, 3), time_sleep=0.1)

In [28]:
# Load the data from a CSV file
test_data = pd.read_csv('/home/giakhang/dev/Hand_pose_estimation_3D/hand_landmarks/data/test_hand_landmarks_2024_6_14_16_12.csv')

num_output_nodes = 21 * 3
X_test = test_data.iloc[:, :-(num_output_nodes)]
Y_test = test_data.iloc[:, -(num_output_nodes):]

In [31]:
num_landmarks_each_cam = 21 * 3
X_test_cam_1 = X_test.values[:, :num_landmarks_each_cam]
visualize_landmarks_through_frame(X_test_cam_1.reshape(-1, 21, 3), time_sleep=0.1)

In [32]:
X_test_cam_2 = X_test.values[:, num_landmarks_each_cam:]
visualize_landmarks_through_frame(X_test_cam_2.reshape(-1, 21, 3), time_sleep=0.1)

In [33]:
visualize_landmarks_through_frame(Y_test.values.reshape(-1, 21, 3), time_sleep=0.1)

In [6]:
num_landmarks = 21 * 3
landmarks_opposite_cam = X.values[:, :num_landmarks]
landmarks_rightside_cam = X.values[:, num_landmarks:]

In [7]:
landmarks_opposite_cam = landmarks_opposite_cam.reshape(landmarks_opposite_cam.shape[0], 21, -1)
landmarks_rightside_cam = landmarks_rightside_cam.reshape(landmarks_rightside_cam.shape[0], 21, -1)
landmarks_gt = Y.values
landmarks_gt = landmarks_gt.reshape(landmarks_gt.shape[0], 21, -1)

In [9]:
visualize_landmarks_through_frame(landmarks_rightside_cam)

In [5]:
train_data_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv"
test_data_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/test_hand_landmarks_2024_6_14_16_12.csv"

# Load the data from a CSV file
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

num_output_nodes = 21 * 3
X_train = train_data.iloc[:, :-(num_output_nodes)]
Y_train = train_data.iloc[:, -(num_output_nodes):]
X_test = test_data.iloc[:, :-(num_output_nodes)]
Y_test = test_data.iloc[:, -(num_output_nodes):]

## Prepare data

In [6]:
train_data_files = ['/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_16_12.csv',
                    '/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_18_1.csv',
                    '/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_18_3.csv',
                    '/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/train_hand_landmarks_2024_6_14_18_6.csv']

In [7]:
train_data_frames = []

for file in train_data_files:
    train_data_frame = pd.read_csv(file)
    train_data_frames.append(train_data_frame)

# Concatenate all data frames into a single data frame
merged_train_data = pd.concat(train_data_frames, ignore_index=True)

In [8]:
test_data_files = [file.replace("train", "test") for file in train_data_files]

test_data_frames = []

for file in test_data_files:
    test_df = pd.read_csv(file)
    test_data_frames.append(test_df)

merged_test_data = pd.concat(test_data_frames, ignore_index=True)

In [9]:
num_output_nodes = 21 * 3
X_train = merged_train_data.iloc[:, :-(num_output_nodes)]
Y_train = merged_train_data.iloc[:, -(num_output_nodes):]
X_test = merged_test_data.iloc[:, :-(num_output_nodes)]
Y_test = merged_test_data.iloc[:, -(num_output_nodes):]

In [None]:
num_landmarks_each_cam = 21 * 3
X_train_cam_1 = X_train.values[:, :num_landmarks_each_cam]
visualize_landmarks_through_frame(X_train_cam_1.reshape(-1, 21, 3), time_sleep=0.01)



In [None]:
visualize_landmarks_through_frame(Y_train.values.reshape(-1, 21, 3), time_sleep=0.1)



In [10]:
print("X_train shape: ", X_train.shape)
print("Y_train shape: ", Y_train.shape)
print("X_test shape: ", X_test.shape)
print("Y_test shape: ", Y_test.shape)

X_train shape:  (1600, 126)
Y_train shape:  (1600, 63)
X_test shape:  (400, 126)
Y_test shape:  (400, 63)


In [11]:
# Initialize the MinMaxScaler for scaling between 0 and 1
scaler_X = MinMaxScaler()
scaler_Y = MinMaxScaler()

In [12]:
# Fit the scaler on the data and transform
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

Y_train_scaled = scaler_Y.fit_transform(Y_train)
Y_test_scaled = scaler_Y.transform(Y_test)

In [13]:
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train_scaled, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test_scaled, dtype=torch.float32)

# Create datasets
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

# Define batch size
batch_size = 32  # Adjust according to your needs

# Create DataLoader objects
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
joblib.dump(scaler_X, "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_X.pkl")

['/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_X.pkl']

In [14]:
joblib.dump(scaler_Y, "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_Y.pkl")

['/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_Y.pkl']

In [14]:
print('X_train shape: ', X_train_tensor.shape)

X_train shape:  torch.Size([1600, 126])


In [15]:
print('X_test shape: ', X_test_tensor.shape)

X_test shape:  torch.Size([400, 126])


# Train model

In [17]:
model = MLP()

# Define your criterion and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Decay the learning rate by a factor of 0.1 every 1000 epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=math.sqrt(0.1))

In [None]:
# Initialize variables to track best model
best_loss = float('inf')
best_epoch = 0
num_epochs = 10000
#device = torch.device(device_want_to_use if torch.cuda.is_available() else "cpu")
device = "mps"
model.to(device)

for epoch in range(num_epochs):    
    # Training
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average training loss
    avg_train_loss = running_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs_val, labels_val in test_loader:
            inputs_val, labels_val = inputs_val.to(device), labels_val.to(device)

            outputs_val = model(inputs_val)
            loss_val = criterion(outputs_val, labels_val)
            val_loss += loss_val.item()

    # Calculate average validation loss
    avg_val_loss = val_loss / len(test_loader)    
        
    # Save best model based on validation loss
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), '/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/weights/best_model.pth')

    #print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    # Step the scheduler at the end of each epoch
    scheduler.step()

print(f'Best model found at epoch {best_epoch+1}, with validation loss: {best_loss:.4f}')

Epoch [1/10000], Train Loss: 0.0634, Val Loss: 0.0155
Epoch [2/10000], Train Loss: 0.0155, Val Loss: 0.0155
Epoch [3/10000], Train Loss: 0.0155, Val Loss: 0.0155
Epoch [4/10000], Train Loss: 0.0155, Val Loss: 0.0154
Epoch [5/10000], Train Loss: 0.0155, Val Loss: 0.0155
Epoch [6/10000], Train Loss: 0.0155, Val Loss: 0.0155
Epoch [7/10000], Train Loss: 0.0155, Val Loss: 0.0154
Epoch [8/10000], Train Loss: 0.0154, Val Loss: 0.0154
Epoch [9/10000], Train Loss: 0.0155, Val Loss: 0.0156
Epoch [10/10000], Train Loss: 0.0155, Val Loss: 0.0152
Epoch [11/10000], Train Loss: 0.0156, Val Loss: 0.0153
Epoch [12/10000], Train Loss: 0.0154, Val Loss: 0.0155
Epoch [13/10000], Train Loss: 0.0156, Val Loss: 0.0152
Epoch [14/10000], Train Loss: 0.0153, Val Loss: 0.0153
Epoch [15/10000], Train Loss: 0.0154, Val Loss: 0.0157
Epoch [16/10000], Train Loss: 0.0157, Val Loss: 0.0154
Epoch [17/10000], Train Loss: 0.0156, Val Loss: 0.0153
Epoch [18/10000], Train Loss: 0.0153, Val Loss: 0.0151
Epoch [19/10000], T

# Evaluate

Evaluate with raw predictions

In [16]:
model_weight = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/weights/best_model.pth"
#device = torch.device(device_want_to_use if torch.cuda.is_available() else "cpu")
device = "mps"

# Load the model state dictionary
model = MLP()

if torch.cuda.is_available():
    model.load_state_dict(torch.load(model_weight))
else:
    model.load_state_dict(torch.load(model_weight,
                                     map_location=torch.device(device)))
    
model.eval()  # Set the model to evaluation mode
model.to(device)
print('Model loaded from mlp_model.pth')

Model loaded from mlp_model.pth


In [17]:
scaler_input_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_X.pkl"
scaler_output_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/neural_networks/scaler/scaler_Y.pkl"

scaler_input = joblib.load(scaler_input_path)
scaler_output = joblib.load(scaler_output_path)

# Define your criterion and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss

In [5]:
test_data_path = "/Users/giakhang/dev/research/Hand_pose_estimation_3D/hand_landmarks/data/test_hand_landmarks_2024_6_14_16_12.csv"

# Load the data from a CSV file
test_data = pd.read_csv(test_data_path)

num_output_nodes = 21 * 3
X_test = test_data.iloc[:, :-(num_output_nodes)]
Y_test = test_data.iloc[:, -(num_output_nodes):]

X_test_scaled = scaler_input.transform(X_test)
Y_test_scaled = scaler_output.transform(Y_test)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test_scaled, dtype=torch.float32)

# Create datasets
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

# Define batch size
batch_size = 32  # Adjust according to your needs

# Create DataLoader objects
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

NameError: name 'scaler_input' is not defined

In [18]:
with torch.no_grad():
    total_loss = 0.0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        
    avg_loss = total_loss / len(test_loader)
    print(f'Average Loss of the model on the test set: {avg_loss:.4f}')

Average Loss of the model on the test set: 0.0135


Evaluate with scaled predictions (error unit: mm)

In [19]:
X_test_tensor = X_test_tensor.to(device)
predictions = model(X_test_tensor)

In [20]:
predictions = predictions.detach().to("cpu").numpy()
predictions_in_mm = scaler_output.inverse_transform(predictions)

In [21]:
Y_test_tensor = scaler_output.inverse_transform(Y_test_tensor)

In [22]:
mean_squared_error(Y_test_tensor, predictions_in_mm)

323.3852872724482

# Visualize predictions

In [23]:
predictions_in_mm = predictions_in_mm.reshape(predictions_in_mm.shape[0], 21, 3)

In [27]:
visualize_landmarks_through_frame(predictions_in_mm, 0.1)

