In [1]:
import numpy as np
import open3d as o3d

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

import time

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, hidden_size)
        self.fc6 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.relu(self.fc5(x))
        x = self.fc6(x)
        return x

In [2]:
hand_lmks_file = np.load('../hand_landmarks.npz')

In [3]:
hand_lmks = hand_lmks_file["landmarks"]

In [4]:
hand_lmks.shape

(69, 21, 3)

In [5]:
input_cam1_cols = ["cam1_x0_in", "cam1_y0_in", "cam1_Z0_in",
               "cam1_x1_in", "cam1_y1_in", "cam1_Z1_in",
               "cam1_x2_in", "cam1_y2_in", "cam1_Z2_in",
               "cam1_x3_in", "cam1_y3_in", "cam1_Z3_in",
               "cam1_x4_in", "cam1_y4_in", "cam1_Z4_in",
               "cam1_x5_in", "cam1_y5_in", "cam1_Z5_in",
               "cam1_x6_in", "cam1_y6_in", "cam1_Z6_in",
               "cam1_x7_in", "cam1_y7_in", "cam1_Z7_in",
               "cam1_x8_in", "cam1_y8_in", "cam1_Z8_in",
               "cam1_x9_in", "cam1_y9_in", "cam1_Z9_in",
               "cam1_x10_in", "cam1_y10_in", "cam1_Z10_in",
               "cam1_x11_in", "cam1_y11_in", "cam1_Z11_in",
               "cam1_x12_in", "cam1_y12_in", "cam1_Z12_in",
               "cam1_x13_in", "cam1_y13_in", "cam1_Z13_in",
               "cam1_x14_in", "cam1_y14_in", "cam1_Z14_in",
               "cam1_x15_in", "cam1_y15_in", "cam1_Z15_in",
               "cam1_x16_in", "cam1_y16_in", "cam1_Z16_in",
               "cam1_x17_in", "cam1_y17_in", "cam1_Z17_in",
               "cam1_x18_in", "cam1_y18_in", "cam1_Z18_in",
               "cam1_x19_in", "cam1_y19_in", "cam1_Z19_in",
               "cam1_x20_in", "cam1_y20_in", "cam1_Z20_in"]
input_cam1_header = ','.join(input_cam1_cols)

In [6]:
#np.savetxt('hand_landmark_data.csv', data, delimiter=',', fmt='%f', header=output_cols_header, comments='')

# Create a pseudo code for storing data to .csv file

In [7]:
def write_data_to_csv(file_name, data, num_cam=2):
    num_points_each_joint = 3
    num_joints_each_hand = 21
    num_input_cols = num_cam * num_points_each_joint * num_joints_each_hand

    input_header = input_cam1_header
    for i in range(2, num_cam+1):
        input_cam_i_header = input_cam1_header.replace("cam1", "cam{}".format(i))
        input_header += ',' + input_cam_i_header

    output_header = input_cam1_header.replace("cam1_", "").replace("in", "out")
    csv_header = input_header + ',' + output_header

    assert len(csv_header.split(",")) == data.shape[1]

    np.savetxt(file_name, data, delimiter=',', fmt='%f', header=csv_header, comments='')

In [8]:
num_frames = hand_lmks.shape[0]
num_input_cols = 2 * 3 * 21  # num_cameras * num_points_each_landmark * num_landmarks_each_hand

input_fake_data = np.random.rand(num_frames, num_input_cols)

In [9]:
output_data = hand_lmks.reshape(hand_lmks.shape[0], -1)

In [10]:
total_data = np.concatenate([input_fake_data, output_data], axis=1)

In [11]:
write_data_to_csv("hand_landmark_data.csv", total_data)

# Prepare data and train model

In [36]:
num_output_nodes = output_data.shape[1]

In [37]:
# Load the data from a CSV file
data = pd.read_csv('hand_landmark_data.csv')

X = data.iloc[:, :-(num_output_nodes)].values
Y = data.iloc[:, -(num_output_nodes):].values

# Convert the data to PyTorch tensors
X_train = torch.tensor(X, dtype=torch.float32)
Y_train = torch.tensor(Y, dtype=torch.float32)

# Create DataLoader
train_dataset = TensorDataset(X_train, Y_train)
test_dataset = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [46]:
input_size = X.shape[1]
hidden_size = 64
output_size = num_output_nodes  # Regression output

model = MLP(input_size, hidden_size, output_size)
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [48]:
num_epochs = 10000

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if epoch % 500 == 0 or epoch == num_epochs - 1:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

Epoch [1/10000], Loss: 18.1124
Epoch [501/10000], Loss: 16.3010
Epoch [1001/10000], Loss: 18.5695
Epoch [1501/10000], Loss: 17.9249
Epoch [2001/10000], Loss: 17.1661
Epoch [2501/10000], Loss: 18.1607
Epoch [3001/10000], Loss: 14.4619
Epoch [3501/10000], Loss: 14.8445
Epoch [4001/10000], Loss: 22.8916
Epoch [4501/10000], Loss: 18.9038
Epoch [5001/10000], Loss: 18.1075
Epoch [5501/10000], Loss: 16.6824
Epoch [6001/10000], Loss: 5.9086
Epoch [6501/10000], Loss: 3.6089
Epoch [7001/10000], Loss: 2.5379
Epoch [7501/10000], Loss: 3.0338
Epoch [8001/10000], Loss: 1.9298
Epoch [8501/10000], Loss: 1.8529
Epoch [9001/10000], Loss: 1.8207
Epoch [9501/10000], Loss: 1.9277
Epoch [10000/10000], Loss: 1.2841


In [49]:
model.eval()
with torch.no_grad():
    total_loss = 0.0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        
    avg_loss = total_loss / len(test_loader)
    print(f'Average Loss of the model on the test set: {avg_loss:.4f}')

Average Loss of the model on the test set: 2.0985


In [50]:
# Save the model state dictionary
torch.save(model.state_dict(), 'mlp_model.pth')
print('Model saved to mlp_model.pth')

Model saved to mlp_model.pth


# Inference again

In [3]:
# Load the data from a CSV file
data = pd.read_csv('hand_landmark_data.csv')

X = data.iloc[:, :-63].values

In [4]:
# Load the model state dictionary
model = MLP(X.shape[1], 64, 63)
model.load_state_dict(torch.load('mlp_model.pth'))
model.eval()  # Set the model to evaluation mode
print('Model loaded from mlp_model.pth')

Model loaded from mlp_model.pth


In [5]:
X = torch.tensor(X, dtype=torch.float32)

In [6]:
Y = model(X)

In [7]:
hand_lnmks = Y[-1]

In [8]:
hand_lnmks = hand_lnmks.reshape(21, 3)

In [9]:
hand_lnmks = hand_lnmks.detach().numpy()

In [10]:
# Create the initial point cloud
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(hand_lnmks)

lines = [[0,1],[1,2],[2,3],[3,4], 
         [0,5],[5,6],[6,7],[7,8],
         [5,9],[9,10],[10,11],[11,12],
         [9,13],[13,14],[14,15],[15,16],
         [13,17],[17,18],[18,19],[19,20],[0,17]]
colors = [[1, 0, 0] for i in range(len(lines))]
line_set = o3d.geometry.LineSet(
    points=o3d.utility.Vector3dVector(hand_lnmks),
    lines=o3d.utility.Vector2iVector(lines),
)
line_set.colors = o3d.utility.Vector3dVector(colors)

In [11]:
o3d.visualization.draw_geometries([pcd, line_set])



# Visualize GTs

In [3]:
hand_lmks_file = np.load('../hand_landmarks.npz')
hand_lmks_gt = hand_lmks_file["landmarks"]

In [None]:
x = np.array([[500, 0, 0],
              [0, 0, 0]])
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(x)

lines = [[0, 0]]
colors = [[1, 0, 0] for i in range(len(lines))]
line_set = o3d.geometry.LineSet(
    points=o3d.utility.Vector3dVector(x),
    lines=o3d.utility.Vector2iVector(lines)
)
line_set.colors = o3d.utility.Vector3dVector(colors)
    
vis = o3d.visualization.Visualizer()
vis.create_window()
vis.add_geometry(pcd)
vis.add_geometry(line_set)

i = 0
while True:
    hand_lmks = hand_lmks_gt[i]
    pcd.points = o3d.utility.Vector3dVector(hand_lmks)

    lines = [[0,1],[1,2],[2,3],[3,4], 
             [0,5],[5,6],[6,7],[7,8],
             [5,9],[9,10],[10,11],[11,12],
             [9,13],[13,14],[14,15],[15,16],
             [13,17],[17,18],[18,19],[19,20],[0,17]]
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set.points = o3d.utility.Vector3dVector(hand_lmks)  # Update the points
    line_set.lines = o3d.utility.Vector2iVector(lines)  # Update the lines
    line_set.colors = o3d.utility.Vector3dVector(colors)

    vis.update_geometry(pcd)
    vis.update_geometry(line_set)
    vis.poll_events()
    vis.update_renderer()

    i += 1
    if i == hand_lmks_gt.shape[0]:
        i = 0
        
    time.sleep(0.1)

vis.destroy_window()

# Visualize predictions

In [3]:
# Load the data from a CSV file
data = pd.read_csv('hand_landmark_data.csv')

X = data.iloc[:, :-63].values

In [4]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

cpu


In [5]:
# Load the model state dictionary
model = MLP(X.shape[1], 64, 63).to(device)
model.load_state_dict(torch.load('mlp_model.pth'))
model.eval()  # Set the model to evaluation mode
print('Model loaded from mlp_model.pth')

Model loaded from mlp_model.pth


In [6]:
X = torch.tensor(X, dtype=torch.float32)

In [None]:
x = np.array([[500, 0, 0],
              [0, 0, 0]])
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(x)

lines = [[0, 0]]
colors = [[1, 0, 0] for i in range(len(lines))]
line_set = o3d.geometry.LineSet(
    points=o3d.utility.Vector3dVector(x),
    lines=o3d.utility.Vector2iVector(lines)
)
line_set.colors = o3d.utility.Vector3dVector(colors)
    
vis = o3d.visualization.Visualizer()
vis.create_window()
vis.add_geometry(pcd)
vis.add_geometry(line_set)

i = 0
while True:
    lmks_input = X[i].to(device)
    lmks_output = model(lmks_input)
    lmks_output = lmks_output.detach().to("cpu").numpy()
    lmks_output = lmks_output.reshape(-1, 3)
    pcd.points = o3d.utility.Vector3dVector(lmks_output)

    lines = [[0,1],[1,2],[2,3],[3,4], 
             [0,5],[5,6],[6,7],[7,8],
             [5,9],[9,10],[10,11],[11,12],
             [9,13],[13,14],[14,15],[15,16],
             [13,17],[17,18],[18,19],[19,20],[0,17]]
    colors = [[1, 0, 0] for i in range(len(lines))]
    line_set.points = o3d.utility.Vector3dVector(lmks_output)  # Update the points
    line_set.lines = o3d.utility.Vector2iVector(lines)  # Update the lines
    line_set.colors = o3d.utility.Vector3dVector(colors)

    vis.update_geometry(pcd)
    vis.update_geometry(line_set)
    vis.poll_events()
    vis.update_renderer()

    i += 1
    if i == X.shape[0]:
        i = 0
        
    time.sleep(0.1)

vis.destroy_window()

In [None]:
21 * 3 * 2

In [None]:
21 * 3