<a href="https://colab.research.google.com/github/Peeranatz/LiftingDetection/blob/main/DataPreprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [96]:
import glob
import os
import numpy as np
import csv

from tqdm import tqdm
from google.colab import files

In [67]:
skeleton_path = "/content/drive/MyDrive/AI/datasets_action/Skeletons_datasets"

label_map = {
    'standing': ['A001', 'A002', 'A011'],
    'moving': ['A009', 'A022', 'A099'],
    'carrying': ['A006', 'A007', 'A089', 'A090', 'A092']
}

selected_files = {'standing': [], 'moving': [], 'carrying': []}

for label, actions in label_map.items():
    print(f"\nProcessing label: {label}")
    for action_id in actions:
        action_dir = os.path.join(skeleton_path, action_id)
        matches = glob.glob(os.path.join(action_dir, "*.skeleton"))
        print(f"  Found {len(matches)} files in {action_dir}")
        # for f in matches[:3]:  # แสดงแค่ 3 ไฟล์แรก
        #     print(f"    → {os.path.basename(f)}")
        selected_files[label].extend(matches)

print("\nAll files loaded successfully.")


Processing label: standing
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A001
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A002
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A011

Processing label: moving
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A009
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A022
  Found 960 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A099

Processing label: carrying
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A006
  Found 948 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A007
  Found 960 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A089
  Found 960 files in /content/drive/MyDrive/AI/datasets_action/Skeletons_datasets/A090
  Found 960 files in /content/drive/MyDrive/AI/d

In [69]:
for label, files in selected_files.items():
    print(f"{label}: {len(files)} files")

standing: 2844 files
moving: 2856 files
carrying: 4776 files


In [50]:
def read_skeleton_file(filename): # อ่านไฟล์ .skeleton
    with open(filename, 'r') as f:
        lines = f.readlines()

    index = 0
    num_frames = int(lines[index].strip()); index += 1
    data = []

    for _ in range(num_frames):
        frame = {}
        num_bodies = int(lines[index].strip()); index += 1
        frame['bodies'] = []

        for _ in range(num_bodies):
            body_id_line = lines[index].strip(); index += 1
            while not lines[index].strip().isdigit():
                index += 1

            num_joints = int(lines[index].strip()); index += 1
            joints = []

            for _ in range(num_joints):
                parts = lines[index].strip().split()
                x, y, z = map(float, parts[:3])
                joints.append((x, y, z))
                index += 1

            frame['bodies'].append({'joints': joints})

        data.append(frame)

    return data

In [51]:
def extract_joint_sequence(skeleton_data, joint_id, body_index=0): # ดึงตำแหน่งของ joint ที่กำหนดตลอด sequence
    sequence = []
    for frame in skeleton_data:
        try:
            joints = frame['bodies'][body_index]['joints']
            if joint_id < len(joints):
                sequence.append(joints[joint_id])
        except:
            continue
    return sequence

In [76]:
def pad_sequence(joint_seq, target_len=30):
    if len(joint_seq) >= target_len:
        padded = joint_seq[:target_len]
    else:
        last = joint_seq[-1] if joint_seq else [(0, 0, 0)] * len(joint_seq[0])
        padded = joint_seq + [last] * (target_len - len(joint_seq))
    return np.array(padded)  # ← return เป็น np.array ตรงนี้

In [55]:
def normalize_sequence(seq):
    """
    normalize ตำแหน่ง joint (x, y, z) ด้วย mean-std
    - seq: shape (T, num_joints, 3)
    """
    seq_np = np.array(seq)
    mean = seq_np.mean(axis=(0, 1), keepdims=True)
    std = seq_np.std(axis=(0, 1), keepdims=True) + 1e-6  # ป้องกันหาร 0
    return (seq_np - mean) / std

In [78]:
selected_joints = [
    6, 7, 8, 22, 23,          # แขนขวา
    10, 11, 12, 24, 25,       # แขนซ้าย
    14, 15, 16, 18, 19, 20,   # ขา
    1, 2, 21                  # ลำตัวกลาง
]
seq_len = 30
label_to_index = {"standing": 0, "moving": 1, "carrying": 2}

X_data = []
y_labels = []
file_names = []  # เผื่อเก็บชื่อไฟล์ไว้ดูภายหลัง

for label, filepaths in selected_files.items():
    print(f"\nProcessing label: {label} ({len(filepaths)} files)")
    for fpath in tqdm(filepaths, desc=f"{label}", unit="file"):
        try:
            data = read_skeleton_file(fpath)
            label_index = label_to_index[label]
            file_name = os.path.basename(fpath)

            joint_seq = []
            for frame in data:
                if not frame['bodies'] or len(frame['bodies'][0]['joints']) == 0:
                    continue
                joints = frame['bodies'][0]['joints']
                selected = []
                for j in selected_joints:
                    if j < len(joints):
                        selected.append(joints[j])
                    else:
                        selected.append((0.0, 0.0, 0.0))
                joint_seq.append(selected)

            if len(joint_seq) == 0:
                continue  # skip this file if no usable frames

            padded = pad_sequence(joint_seq, target_len=seq_len)
            X_data.append(padded)
            y_labels.append(label_index)
            file_names.append(file_name)

        except Exception as e:
            print(f"⚠️ Error in {fpath}: {e}")


Processing label: standing (2844 files)


standing: 100%|██████████| 2844/2844 [00:40<00:00, 69.57file/s]



Processing label: moving (2856 files)


moving: 100%|██████████| 2856/2856 [00:32<00:00, 88.95file/s]



Processing label: carrying (4776 files)


carrying: 100%|██████████| 4776/4776 [00:53<00:00, 88.45file/s]


In [79]:
# แปลงเป็น numpy array
X_data = np.array(X_data)  # shape: (N, seq_len, num_joints, 3)
y_labels = np.array(y_labels)

# บันทึกเป็น .npz
np.savez_compressed("/content/skeleton_dataset_30frames.npz", X=X_data, y=y_labels, files=np.array(file_names))

print("\nExported to /content/skeleton_dataset_30frames.npz")
print(f"X_data shape: {X_data.shape}")
print(f"y_labels shape: {y_labels.shape}")


Exported to /content/skeleton_dataset_30frames.npz
X_data shape: (10421, 30, 19, 3)
y_labels shape: (10421,)


In [97]:
files.download("/content/skeleton_dataset_30frames.npz")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [87]:
# print(X_data[0].shape)
# print(X_data[0][0]) # ดูตำแหน่งของ joint ในเฟรมที่ 0 ของตัวอย่างที่ 0
# print(X_data[0][0][5]) # ดูตำแหน่งของ joint ที่ 5 (ของตัวอย่างที่ 0, เฟรมที่ 0)

np.set_printoptions(precision=3, suppress=True)
print(X_data[0][0])  # joint ทั้งหมดในเฟรมแรกของตัวอย่างแรก

[[-0.445 -0.111  3.723]
 [-0.436 -0.187  3.751]
 [-0.07   0.36   3.561]
 [-0.4   -0.162  3.741]
 [ 0.022 -0.264  3.715]
 [ 0.008 -0.12   3.74 ]
 [ 0.002 -0.191  3.737]
 [-0.285 -0.086  3.752]
 [-0.032 -0.189  3.725]
 [ 0.     0.     0.   ]
 [-0.321 -0.626  4.132]
 [-0.309 -0.71   4.031]
 [-0.142 -0.065  3.756]
 [-0.11  -0.632  4.133]
 [-0.1   -0.717  4.032]
 [-0.223  0.374  3.54 ]
 [-0.222  0.181  3.649]
 [-0.223  0.439  3.501]
 [-0.429 -0.266  3.77 ]]


In [92]:
def print_joint_info(X_data, sample_idx, frame_idx, joint_idx):
    joint_index_map = {
        0:  ("6",  "Right Elbow"),
        1:  ("7",  "Right Wrist"),
        2:  ("8",  "Right Hand"),
        3:  ("22", "Right Hand Tip"),
        4:  ("23", "Right Thumb"),
        5:  ("10", "Left Elbow"),
        6:  ("11", "Left Wrist"),
        7:  ("12", "Left Hand"),
        8:  ("24", "Left Hand Tip"),
        9:  ("25", "Left Thumb"),
        10: ("14", "Right Hip"),
        11: ("15", "Right Knee"),
        12: ("16", "Right Ankle"),
        13: ("18", "Left Hip"),
        14: ("19", "Left Knee"),
        15: ("20", "Left Ankle"),
        16: ("1",  "Spine Mid / Neck"),
        17: ("2",  "Spine Shoulder"),
        18: ("21", "Spine Base")
    }

    try:
        joint_id, joint_name = joint_index_map[joint_idx]
        x, y, z = X_data[sample_idx][frame_idx][joint_idx]
        print(f"Sample {sample_idx}, Frame {frame_idx}, Joint {joint_idx}")
        print(f"→ Joint in NTU Dataset {joint_id}: {joint_name}")
        print(f"→ Coordinates: x={x:.3f}, y={y:.3f}, z={z:.3f}")
    except IndexError:
        print("Index out of range — กรุณาตรวจสอบ index ที่ใช้")
    except KeyError:
        print("Joint index ไม่อยู่ใน selected_joints ที่กำหนดไว้")

In [93]:
print_joint_info(X_data, sample_idx=0, frame_idx=0, joint_idx=1)

Sample 0, Frame 0, Joint 1
→ Joint in NTU Dataset 7: Right Wrist
→ Coordinates: x=-0.436, y=-0.187, z=3.751


In [82]:
label_names = {0: 'standing', 1: 'moving', 2: 'carrying'}
unique, counts = np.unique(y_labels, return_counts=True)
for label, count in zip(unique, counts):
    print(f"{label_names[label]} ({label}): {count}")

standing (0): 2822
moving (1): 2840
carrying (2): 4759
