In [1]:
import sys
sys.path.append("../../")

In [2]:
import numpy as np
import open3d as o3d
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import joblib
import time

from hand_landmarks.neural_networks import MLP

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
def visualize_landmarks_through_frame(landmarks, time_sleep=0.01):
    assert landmarks.shape[1:] == (21, 3)
    
    x = np.array([[500, 0, 0],
                  [0, 0, 0]])
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(x)

    lines = [[0, 0]]
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set = o3d.geometry.LineSet(
        points=o3d.utility.Vector3dVector(x),
        lines=o3d.utility.Vector2iVector(lines)
    )
    line_set.colors = o3d.utility.Vector3dVector(colors)
    
    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(pcd)
    vis.add_geometry(line_set)

    for i in range(landmarks.shape[0]):
        hand_lmks = landmarks[i]
        pcd.points = o3d.utility.Vector3dVector(hand_lmks)

        lines = [[0,1],[1,2],[2,3],[3,4], 
                 [0,5],[5,6],[6,7],[7,8],
                 [5,9],[9,10],[10,11],[11,12],
                 [9,13],[13,14],[14,15],[15,16],
                 [13,17],[17,18],[18,19],[19,20],[0,17]]
        colors = [[1, 0, 0] for i in range(len(lines))]
        line_set.points = o3d.utility.Vector3dVector(hand_lmks)  # Update the points
        line_set.lines = o3d.utility.Vector2iVector(lines)  # Update the lines
        line_set.colors = o3d.utility.Vector3dVector(colors)

        vis.update_geometry(pcd)
        vis.update_geometry(line_set)
        vis.poll_events()
        vis.update_renderer()
        
        time.sleep(time_sleep)

    vis.destroy_window()

In [4]:
"""
class MLP(nn.Module):
    def __init__(self, input_size=21*3*2, output_size=21*3):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 100)
        self.fc2 = nn.Linear(100, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x
"""

'\nclass MLP(nn.Module):\n    def __init__(self, input_size=21*3*2, output_size=21*3):\n        super(MLP, self).__init__()\n        self.fc1 = nn.Linear(input_size, 100)\n        self.fc2 = nn.Linear(100, 64)\n        self.fc3 = nn.Linear(64, 64)\n        self.fc4 = nn.Linear(64, output_size)\n        self.relu = nn.ReLU()\n        \n    def forward(self, x):\n        x = self.relu(self.fc1(x))\n        x = self.relu(self.fc2(x))\n        x = self.relu(self.fc3(x))\n        x = self.fc4(x)\n        return x\n'

In [5]:
def write_data_to_csv(file_name, data, num_cam=2):
    num_points_each_joint = 3
    num_joints_each_hand = 21
    num_input_cols = num_cam * num_points_each_joint * num_joints_each_hand

    input_header = input_cam1_header
    for i in range(2, num_cam+1):
        input_cam_i_header = input_cam1_header.replace("cam1", "cam{}".format(i))
        input_header += ',' + input_cam_i_header

    output_header = input_cam1_header.replace("cam1_", "").replace("in", "out")
    csv_header = input_header + ',' + output_header

    assert len(csv_header.split(",")) == data.shape[1]

    np.savetxt(file_name, data, delimiter=',', fmt='%f', header=csv_header, comments='')

# Visualize GTs

In [4]:
hand_lmks_file = np.load('../hand_landmarks_2024_06_12.npz')
hand_lmks_gt = hand_lmks_file["landmarks_output"]

In [5]:
visualize_landmarks_through_frame(hand_lmks_gt)

# Visualize raw landmarks

In [75]:
hand_lmks_file = np.load('../hand_landmarks_2024_06_12.npz')
hand_lmks_input = hand_lmks_file["landmarks_input"]

In [64]:
hand_lmks_opposite_cam = hand_lmks_input[:, :21]

In [65]:
visualize_landmarks_through_frame(hand_lmks_opposite_cam)

In [76]:
hand_lmks_rightside_cam = hand_lmks_input[:, 21:]

In [77]:
visualize_landmarks_through_frame(hand_lmks_rightside_cam)

# Save to .csv

In [25]:
input_cam1_cols = ["cam1_X00_in", "cam1_Y00_in", "cam1_Z00_in",
                   "cam1_X01_in", "cam1_Y01_in", "cam1_Z01_in",
                   "cam1_X02_in", "cam1_Y02_in", "cam1_Z02_in",
                   "cam1_X03_in", "cam1_Y03_in", "cam1_Z03_in",
                   "cam1_X04_in", "cam1_Y04_in", "cam1_Z04_in",
                   "cam1_X05_in", "cam1_Y05_in", "cam1_Z05_in",
                   "cam1_X06_in", "cam1_Y06_in", "cam1_Z06_in",
                   "cam1_X07_in", "cam1_Y07_in", "cam1_Z07_in",
                   "cam1_X08_in", "cam1_Y08_in", "cam1_Z08_in",
                   "cam1_X09_in", "cam1_Y09_in", "cam1_Z09_in",
                   "cam1_X10_in", "cam1_Y10_in", "cam1_Z10_in",
                   "cam1_X11_in", "cam1_Y11_in", "cam1_Z11_in",
                   "cam1_X12_in", "cam1_Y12_in", "cam1_Z12_in",
                   "cam1_X13_in", "cam1_Y13_in", "cam1_Z13_in",
                   "cam1_X14_in", "cam1_Y14_in", "cam1_Z14_in",
                   "cam1_X15_in", "cam1_Y15_in", "cam1_Z15_in",
                   "cam1_X16_in", "cam1_Y16_in", "cam1_Z16_in",
                   "cam1_X17_in", "cam1_Y17_in", "cam1_Z17_in",
                   "cam1_X18_in", "cam1_Y18_in", "cam1_Z18_in",
                   "cam1_X19_in", "cam1_Y19_in", "cam1_Z19_in",
                   "cam1_X20_in", "cam1_Y20_in", "cam1_Z20_in"]

input_cam1_header = ','.join(input_cam1_cols)

In [34]:
hand_lmks_file = np.load('../hand_landmarks_2024_06_12.npz')
hand_lmks_input, hand_lmks_gt = hand_lmks_file["landmarks_input"], hand_lmks_file["landmarks_output"]

In [35]:
hand_lmks_input = hand_lmks_input.reshape(hand_lmks_input.shape[0], -1)
hand_lmks_gt = hand_lmks_gt.reshape(hand_lmks_gt.shape[0], -1)

In [39]:
total_data = np.concatenate([hand_lmks_input, hand_lmks_gt], axis=1)

In [41]:
write_data_to_csv("hand_landmarks_2024_06_13.csv", total_data)

# Prepare data

In [6]:
# Load the data from a CSV file
data = pd.read_csv('hand_landmarks_2024_06_13.csv')

num_output_nodes = 21 * 3
X = data.iloc[:, :-(num_output_nodes)]
Y = data.iloc[:, -(num_output_nodes):]

In [7]:
num_landmarks = 21 * 3
landmarks_opposite_cam = X.values[:, :num_landmarks]
landmarks_rightside_cam = X.values[:, num_landmarks:]

In [8]:
landmarks_opposite_cam = landmarks_opposite_cam.reshape(landmarks_opposite_cam.shape[0], 21, -1)
landmarks_rightside_cam = landmarks_rightside_cam.reshape(landmarks_rightside_cam.shape[0], 21, -1)
landmarks_gt = Y.values
landmarks_gt = landmarks_gt.reshape(landmarks_gt.shape[0], 21, -1)

In [9]:
visualize_landmarks_through_frame(landmarks_rightside_cam)

In [10]:
# Initialize the MinMaxScaler for scaling between 0 and 1
scaler_X = MinMaxScaler()
scaler_Y = MinMaxScaler()

In [11]:
# Fit the scaler on the data and transform
X_scaled = scaler_X.fit_transform(X)
Y_scaled = scaler_Y.fit_transform(Y)

In [12]:
# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_scaled, test_size=0.2, random_state=42)

In [13]:
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

# Create datasets
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

# Define batch size
batch_size = 32  # Adjust according to your needs

# Create DataLoader objects
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
joblib.dump(scaler_X, "scaler_input.pkl")

['scaler_input.pkl']

In [15]:
joblib.dump(scaler_Y, "scaler_output.pkl")

['scaler_output.pkl']

# Train model

In [16]:
model = MLP()

# Define your criterion and optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [17]:
# Initialize variables to track best model
best_loss = float('inf')
best_epoch = 0
num_epochs = 10000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average training loss
    avg_train_loss = running_loss / len(train_loader)

    # Print training and validation loss
    if epoch % 500 == 0 or epoch == num_epochs - 1:

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs_val, labels_val in test_loader:
                inputs_val, labels_val = inputs_val.to(device), labels_val.to(device)

                outputs_val = model(inputs_val)
                loss_val = criterion(outputs_val, labels_val)
                val_loss += loss_val.item()

        # Calculate average validation loss
        avg_val_loss = val_loss / len(test_loader)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        # Save best model based on validation loss
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            best_epoch = epoch
            torch.save(model.state_dict(), 'best_model.pth')

print(f'Best model found at epoch {best_epoch+1}, with validation loss: {best_loss:.4f}')

Epoch [1/10000], Train Loss: 0.1146, Val Loss: 0.0449
Epoch [501/10000], Train Loss: 0.0019, Val Loss: 0.0035
Epoch [1001/10000], Train Loss: 0.0013, Val Loss: 0.0039
Epoch [1501/10000], Train Loss: 0.0017, Val Loss: 0.0044
Epoch [2001/10000], Train Loss: 0.0019, Val Loss: 0.0051
Epoch [2501/10000], Train Loss: 0.0022, Val Loss: 0.0046
Epoch [3001/10000], Train Loss: 0.0026, Val Loss: 0.0047
Epoch [3501/10000], Train Loss: 0.0018, Val Loss: 0.0035
Epoch [4001/10000], Train Loss: 0.0017, Val Loss: 0.0040
Epoch [4501/10000], Train Loss: 0.0066, Val Loss: 0.0085
Epoch [5001/10000], Train Loss: 0.0035, Val Loss: 0.0073
Epoch [5501/10000], Train Loss: 0.0035, Val Loss: 0.0050
Epoch [6001/10000], Train Loss: 0.0030, Val Loss: 0.0053
Epoch [6501/10000], Train Loss: 0.0030, Val Loss: 0.0051
Epoch [7001/10000], Train Loss: 0.0083, Val Loss: 0.0137
Epoch [7501/10000], Train Loss: 0.0037, Val Loss: 0.0062
Epoch [8001/10000], Train Loss: 0.0034, Val Loss: 0.0049
Epoch [8501/10000], Train Loss: 0.0

# Evaluate

Evaluate with raw predictions

In [18]:
# Load the model state dictionary
model = MLP()
model.load_state_dict(torch.load('best_model.pth'))
model.eval()  # Set the model to evaluation mode
model.to(device)
print('Model loaded from mlp_model.pth')

Model loaded from mlp_model.pth


In [19]:
with torch.no_grad():
    total_loss = 0.0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        
    avg_loss = total_loss / len(test_loader)
    print(f'Average Loss of the model on the test set: {avg_loss:.4f}')

Average Loss of the model on the test set: 0.0035


Evaluate with scaled predictions (error unit: mm)

In [20]:
X_test_tensor = X_test_tensor.to(device)
predictions = model(X_test_tensor)

In [21]:
predictions = predictions.detach().to("cpu").numpy()
predictions_in_mm = scaler_Y.inverse_transform(predictions)

In [22]:
Y_test_tensor = scaler_Y.inverse_transform(Y_test_tensor)

In [23]:
mean_squared_error(Y_test_tensor, predictions_in_mm)

44.63265795646973

# Visualize predictions

In [21]:
predictions_in_mm = predictions_in_mm.reshape(predictions_in_mm.shape[0], 21, 3)

In [22]:
visualize_landmarks_through_frame(predictions_in_mm, 1)

In [24]:
predictions_in_mm[:, :3]

AttributeError: 'numpy.ndarray' object has no attribute 'abs'

In [25]:
np.absolute(predictions_in_mm[:, :3])

array([[[1.85609650e-04, 4.66348865e-07, 4.28860294e-05],
        [1.10967541e+01, 3.52253647e+01, 3.33618660e+01],
        [2.71866302e+01, 9.21353912e+01, 4.91426544e+01]],

       [[2.56806845e-04, 5.61283014e-06, 3.14971621e-05],
        [1.23917141e+01, 2.57530136e+01, 2.52710342e+01],
        [7.36253214e+00, 5.77145081e+01, 4.64300499e+01]],

       [[3.37627367e-04, 4.94060077e-06, 3.90565983e-05],
        [3.95332742e+00, 3.52010002e+01, 3.75600395e+01],
        [8.32162666e+00, 8.57641144e+01, 6.38877602e+01]],

       [[1.80617688e-04, 2.37532731e-06, 8.88012437e-08],
        [1.11942749e+01, 3.06096916e+01, 1.83208580e+01],
        [2.17585373e+01, 5.97155571e+01, 3.09899120e+01]],

       [[3.31979943e-04, 6.50679794e-07, 2.42017322e-05],
        [2.52948713e+00, 1.62722015e+01, 3.77077446e+01],
        [1.35170424e+00, 5.00448151e+01, 6.76720505e+01]],

       [[2.52390397e-04, 4.14538590e-06, 4.94359410e-05],
        [1.54932795e+01, 3.11410351e+01, 2.76145763e+01],
    