In [1]:
!pwd

/home/giakhang/dev/pose_sandbox


In [2]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(os.curdir),
                "Hand_pose_estimation_3D/arm_and_hand"))
sys.path.append(os.path.join(os.path.abspath(os.curdir),
                "Hand_pose_estimation_3D"))

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F
import matplotlib.pyplot as plt
import os
from datetime import datetime
import glob
from transformer_encoder import TransformerEncoder
from torch.optim.lr_scheduler import ReduceLROnPlateau
from ann import ANN
from dataloader_ann import HandArmLandmarksDataset_ANN
import math
from sklearn.preprocessing import MinMaxScaler
from csv_writer import columns_to_normalize, fusion_csv_columns_name
import pandas as pd
import joblib
import numpy as np

from landmarks_scaler import LandmarksScaler
from train_ann_no_intrinsics import train_model

In [3]:
thumb_landmarks = ["left shoulder", "left hip", "right shoulder", "right hip", 
    "left elbow", "WRIST", 
    "THUMB_CMC", "INDEX_FINGER_MCP", "MIDDLE_FINGER_MCP", "RING_FINGER_MCP", "PINKY_MCP",
    "THUMB_TIP", "INDEX_FINGER_TIP", "MIDDLE_FINGER_TIP", "RING_FINGER_TIP", "PINKY_TIP"]

In [4]:
INPUT_DIM = 144 + 144 + (len(thumb_landmarks) * 3)
OUTPUT_DIM = 144 
HIDDEN_DIM = int(144 + (144 / 2))
NUM_HIDDEN_LAYERS = 4
DROPOUT_RATE = 0.1

model = ANN(input_dim=INPUT_DIM,
            output_dim=OUTPUT_DIM,
            hidden_dim=HIDDEN_DIM,
            num_hidden_layers=NUM_HIDDEN_LAYERS,
            dropout_rate=DROPOUT_RATE)
model = model.to("cuda")

In [5]:
MODEL_NAME = "ann"
DATETIME = "{}".format(datetime.now().strftime("%Y%m%d-%H%M"))
DATE = "{}".format(datetime.now().strftime("%Y%m%d"))
BASE_DIR = "/home/giakhang/dev/pose_sandbox/Hand_pose_estimation_3D/arm_and_hand/runs/{}".format(MODEL_NAME)
SAVE_DIR = os.path.join(BASE_DIR, DATE, DATETIME)
DATA_DIR = "/home/giakhang/dev/pose_sandbox/data"  
writer = SummaryWriter(log_dir=SAVE_DIR)

In [6]:
SELECTED_DATE = "2024-*"
train_paths = glob.glob(os.path.join(DATA_DIR, "{}/{}/fine_landmarks_{}_*.csv".format(SELECTED_DATE, SELECTED_DATE, "train")))
val_paths = glob.glob(os.path.join(DATA_DIR, "{}/{}/fine_landmarks_{}_*.csv".format(SELECTED_DATE, SELECTED_DATE, "val")))
body_lines = [[0,2], [0, 3], [2, 4], [3, 4]]
lefthand_lines = [[0, 1], [1, 5], [5, 6], 
                  [5, 10], [5, 22], [10, 14], 
                  [14, 18], [18, 22], [6, 7], 
                  [7, 8], [8, 9], [10, 11], 
                  [11, 12], [12, 13], [14, 15], 
                  [15, 16], [16, 17], [18, 19], 
                  [19, 20], [20, 21], [22, 23], 
                  [23, 24], [24, 25]]
train_body_distance_thres = 550
train_leftarm_distance_thres = 550
train_lefthand_distance_thres = 200
val_body_distance_thres=450
val_leftarm_distance_thres=450
val_lefthand_distance_thres=150

arm_hand_fused_names = ["left shoulder", "left elbow", "left hip", "right shoulder",
    "right hip", "WRIST", "THUMB_CMC", "THUMB_MCP", "THUMB_IP", 
    "THUMB_TIP", "INDEX_FINGER_MCP", "INDEX_FINGER_PIP", "INDEX_FINGER_DIP",
    "INDEX_FINGER_TIP", "MIDDLE_FINGER_MCP", "MIDDLE_FINGER_PIP", "MIDDLE_FINGER_DIP",
    "MIDDLE_FINGER_TIP", "RING_FINGER_MCP", "RING_FINGER_PIP", "RING_FINGER_DIP",
    "RING_FINGER_TIP", "PINKY_MCP", "PINKY_PIP", "PINKY_DIP", "PINKY_TIP", "right elbow",
    "RIGHT_WRIST", "RIGHT_THUMB_CMC", "RIGHT_THUMB_MCP", "RIGHT_THUMB_IP", "RIGHT_THUMB_TIP",
    "RIGHT_INDEX_FINGER_MCP", "RIGHT_INDEX_FINGER_PIP", "RIGHT_INDEX_FINGER_DIP",
    "RIGHT_INDEX_FINGER_TIP", "RIGHT_MIDDLE_FINGER_MCP", "RIGHT_MIDDLE_FINGER_PIP",
    "RIGHT_MIDDLE_FINGER_DIP", "RIGHT_MIDDLE_FINGER_TIP", "RIGHT_RING_FINGER_MCP",
    "RIGHT_RING_FINGER_PIP", "RIGHT_RING_FINGER_DIP", "RIGHT_RING_FINGER_TIP",
    "RIGHT_PINKY_MCP", "RIGHT_PINKY_PIP", "RIGHT_PINKY_DIP", "RIGHT_PINKY_TIP"]

In [7]:
# Load the true dataset to get the scaler then pass the scaler to the true and fake dataset
minmax_scaler = MinMaxScaler()
train_dataset = HandArmLandmarksDataset_ANN(train_paths, 
    arm_hand_fused_names,
    body_lines, 
    lefthand_lines, 
    train_body_distance_thres, 
    train_leftarm_distance_thres, 
    train_lefthand_distance_thres,
    filter_outlier=True,
    only_keep_frames_contain_lefthand=True,
    cvt_normalized_xy_to_XY=True,
    use_fused_thumb_as_input=True)
minmax_scaler.fit_transform(train_dataset._inputs)
scaler_save_path = os.path.join(SAVE_DIR, "input_scaler.pkl")
joblib.dump(minmax_scaler, scaler_save_path)

['/home/giakhang/dev/pose_sandbox/Hand_pose_estimation_3D/arm_and_hand/runs/ann/20241021/20241021-1343/input_scaler.pkl']

In [8]:
scaler = LandmarksScaler(scaler_path=scaler_save_path)
train_dataset = HandArmLandmarksDataset_ANN(train_paths, 
    arm_hand_fused_names,
    body_lines, 
    lefthand_lines, 
    train_body_distance_thres, 
    train_leftarm_distance_thres, 
    train_lefthand_distance_thres,
    filter_outlier=True,
    only_keep_frames_contain_lefthand=True,
    scaler=scaler,
    cvt_normalized_xy_to_XY=True,
    use_fused_thumb_as_input=True)
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_dataset = HandArmLandmarksDataset_ANN(val_paths,
    arm_hand_fused_names,
    body_lines,
    lefthand_lines,
    val_body_distance_thres,
    val_leftarm_distance_thres,
    val_lefthand_distance_thres,
    filter_outlier=True,
    only_keep_frames_contain_lefthand=True,
    scaler=scaler,
    cvt_normalized_xy_to_XY=True,
    use_fused_thumb_as_input=True)
val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=True)  

In [9]:
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 50000
current_time = datetime.now().strftime('%Y%m%d-%H%M')
save_path = os.path.join(SAVE_DIR, "{}_{}_layers_best.pth".format(MODEL_NAME, NUM_HIDDEN_LAYERS))
scheduler = ReduceLROnPlateau(optimizer, mode='min', 
    factor=math.sqrt(0.1), patience=1000, verbose=True, min_lr=1e-8)
early_stopping = None



In [10]:
left_wrist_idx = 5
num_left_arm_landmarks = 21
#left_arm_idx = list(range(left_wrist_idx + 1, left_wrist_idx + num_left_arm_landmarks))
left_arm_idx = [7, 8, 11, 12, 15, 16, 19, 20, 23, 24]

train_losses, val_losses = train_model(model, 
    train_dataloader, 
    val_dataloader, 
    optimizer, 
    num_epochs=num_epochs, 
    save_path=save_path,
    early_stopping=early_stopping,
    scheduler=scheduler,
    writer=writer,
    log_seq=50,
    train_left_arm_hand_only=True,
    weight_idx=left_arm_idx,
    weight=2.)

writer.close()

Model saved with Validation Loss: 1566345.6500
Model saved with Validation Loss: 1555939.0125
Model saved with Validation Loss: 1554306.7500
Model saved with Validation Loss: 1550695.5375
Model saved with Validation Loss: 1546532.6375
Model saved with Validation Loss: 1541366.6125
Model saved with Validation Loss: 1541038.9875
Model saved with Validation Loss: 1535055.6000
Model saved with Validation Loss: 1523663.6375
Model saved with Validation Loss: 1522478.0125
Model saved with Validation Loss: 1521654.0750
Model saved with Validation Loss: 1501585.1500
Model saved with Validation Loss: 1492060.7875
Model saved with Validation Loss: 1480026.2000
Model saved with Validation Loss: 1458369.5875
Model saved with Validation Loss: 1455748.5625
Model saved with Validation Loss: 1445385.8000
Model saved with Validation Loss: 1442011.4750
Model saved with Validation Loss: 1436921.5125
Epoch 50/50000, Training Loss: 1431686.2972
Epoch 50/50000, Validation Loss: 1432480.6875
Model saved with 

KeyboardInterrupt: 