In [1]:
import os
import numpy as np
import pandas as pd

# === [1] Path to landmarks folder ===
landmarks_dir = "captured_data/landmarks/"
output_csv = "landmarks_dataset.csv"

# === [2] Empty lists to store data ===
data = []
labels = []

# === [3] Traverse through each label folder ===
for label_name in os.listdir(landmarks_dir):
    
    label_path = os.path.join(landmarks_dir, label_name)
    
    if not os.path.isdir(label_path):
        continue  # Skip non-folders
    
    for file_name in os.listdir(label_path):
        if file_name.endswith('.npy'):
            file_path = os.path.join(label_path, file_name)
            
            # Load .npy landmark data
            landmarks = np.load(file_path)
            
            # Flatten 21x3 = 63 features
            flattened_landmarks = landmarks.flatten()
            
            data.append(flattened_landmarks)
            labels.append(label_name)

print(f"Total Samples Collected: {len(data)}")

# === [4] Create DataFrame ===
df = pd.DataFrame(data)
df['label'] = labels

print("Dataframe Shape:", df.shape)

# === [5] Export to CSV ===
df.to_csv(output_csv, index=False)
print(f"Dataset saved successfully to {output_csv}")

Total Samples Collected: 180
Dataframe Shape: (180, 64)
Dataset saved successfully to landmarks_dataset.csv


  from pandas.core import (


In [2]:
landmarks_data = pd.read_csv('landmarks_dataset.csv')

landmarks_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,label
0,0.517754,0.905560,1.473566e-07,0.516693,0.798606,0.019614,0.496096,0.718364,0.033674,0.457531,...,0.361810,0.815343,0.035725,0.342632,0.790637,0.044646,0.328020,0.768959,0.050737,please
1,0.584487,0.921963,2.834070e-07,0.582382,0.820848,0.008208,0.568748,0.744881,0.010880,0.539280,...,0.422817,0.880903,-0.015751,0.397436,0.864309,-0.016621,0.377107,0.845363,-0.016414,please
2,0.425893,0.868665,1.512977e-07,0.430534,0.772791,0.008915,0.454295,0.715321,0.012780,0.489662,...,0.566774,0.838963,-0.006008,0.586781,0.826357,-0.001663,0.601068,0.814274,0.002199,please
3,0.559062,0.944716,7.708704e-08,0.549858,0.845902,0.007825,0.564966,0.768869,0.013401,0.591337,...,0.703882,0.871311,0.008180,0.720365,0.844001,0.014023,0.730470,0.820380,0.019484,please
4,0.518579,0.941244,9.775813e-08,0.511786,0.845036,0.004552,0.525681,0.768467,0.006787,0.551321,...,0.662779,0.862471,-0.004726,0.678994,0.835751,-0.000824,0.688797,0.811928,0.003094,please
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,0.794734,0.714286,-3.788984e-07,0.734326,0.695419,-0.045344,0.678964,0.684280,-0.082537,0.642019,...,0.809672,0.535133,-0.092417,0.797308,0.597328,-0.089585,0.794325,0.602982,-0.083005,yes
176,0.772579,0.615129,-4.844613e-07,0.704143,0.592067,-0.048391,0.640289,0.576793,-0.086929,0.588816,...,0.777817,0.356066,-0.082775,0.760715,0.434422,-0.075908,0.756545,0.462812,-0.064806,yes
177,0.858982,0.614430,-3.327126e-07,0.811999,0.590693,-0.049987,0.764351,0.557989,-0.086945,0.734952,...,0.919935,0.380615,-0.089408,0.898804,0.452695,-0.084221,0.884233,0.473798,-0.074063,yes
178,0.828163,0.829119,-3.066418e-07,0.780218,0.806841,-0.052138,0.729621,0.798412,-0.095865,0.696111,...,0.885901,0.690319,-0.113698,0.864515,0.755897,-0.112308,0.852791,0.768768,-0.106460,yes


### Data Preparation and CSV Creation Explanation

In the data preparation step, the raw `.npy` files containing 21x3 hand landmark coordinates for each captured sign were processed and converted into a clean and structured CSV file suitable for machine learning. Each `.npy` file was loaded, flattened into a 1D array of 63 features (x, y, z for 21 landmarks), and labeled based on its parent folder name (hello, goodbye, please, thank_you, yes, no). All this data was combined into a single DataFrame and saved as `landmarks_dataset.csv`.

The final CSV contains 180 samples with 64 columns — 63 feature columns for landmark coordinates and 1 label column indicating the sign. The landmark values are normalized between 0 and 1, as provided by MediaPipe, and the labels are stored as text for clarity. This structured dataset is clean, lightweight, and directly ready for training machine learning models for sign language classification.