<div style="background-color:rgb(0, 55, 207); padding: 30px; border-radius: 20px; box-shadow: 0 4px 15px rgba(105, 195, 255, 0.3); color:rgb(187, 201, 248); font-family: 'Times New Roman', serif;">

<h1 style="text-align: center; font-size: 38px; color: white; font-weight: bold;">ST-GCN Data Prepration</h1>

<h3 style="font-size: 22px; color: white; font-weight: bold;">Libraries</h3>

In [None]:
import os
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

<h3 style="font-size: 22px; color: white; font-weight: bold;">Configuration</h3>

In [None]:
DATA_DIR = r"DIRECTORY_WHEREYOUR_NUMPY_FILES"
LABEL_FILE = r"YOUR_LABELS_FILE.TXT"
OUTPUT_DIR = './data/mediapipe_asl'
TEST_SIZE = 0.2     # 80% train, 20% test

<h3 style="font-size: 22px; color: white; font-weight: bold;">Load Labels</h3>

In [None]:
samples = []
labels = []

with open(os.path.join(DATA_DIR, LABEL_FILE), 'r') as f:
    for line in f:
        name, label = line.strip().split()
        path = os.path.join(DATA_DIR, name)
        if os.path.exists(path):
            samples.append(path)
            labels.append(label)
        else:
            print(f"⚠️ Missing file: {path}")

<h3 style="font-size: 22px; color: white; font-weight: bold;">Map Labels</h3>

In [None]:
unique_labels = sorted(set(labels))
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
index_to_label = {idx: label for label, idx in label_to_index.items()}
int_labels = [label_to_index[label] for label in labels]

<h3 style="font-size: 22px; color: white; font-weight: bold;">Split Data</h3>

In [None]:
train_paths, test_paths, train_labels, test_labels = train_test_split(
    samples, int_labels, test_size=TEST_SIZE, stratify=int_labels, random_state=42
)

def load_data(paths):
    return np.array([np.load(p) for p in paths])

<h3 style="font-size: 22px; color: white; font-weight: bold;">Save Data</h3>

In [None]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

np.save(os.path.join(OUTPUT_DIR, 'train_data.npy'), load_data(train_paths))
np.save(os.path.join(OUTPUT_DIR, 'val_data.npy'), load_data(test_paths))

with open(os.path.join(OUTPUT_DIR, 'train_label.pkl'), 'wb') as f:
    pickle.dump(([os.path.basename(p) for p in train_paths], train_labels), f)

with open(os.path.join(OUTPUT_DIR, 'val_label.pkl'), 'wb') as f:
    pickle.dump(([os.path.basename(p) for p in test_paths], test_labels), f)

# === OPTIONAL: Save label mappings for later decoding ===
with open(os.path.join(OUTPUT_DIR, 'label_mapping.pkl'), 'wb') as f:
    pickle.dump({'label_to_index': label_to_index, 'index_to_label': index_to_label}, f)

print(f"✅ Done! Saved to {OUTPUT_DIR}")