 For dataset creation

In [1]:
import os
import pickle
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd

print("Starting data preprocessing...")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.5)

DATA_DIR = './data'
data = []
labels = []

try:
    for dir_ in os.listdir(DATA_DIR):
        class_path = os.path.join(DATA_DIR, dir_)
        if not os.path.isdir(class_path):
            continue

        print(f"Processing class {dir_}...")
        for img_path in os.listdir(class_path):
            img_full_path = os.path.join(class_path, img_path)
            if not os.path.isfile(img_full_path):
                continue

            img = cv2.imread(img_full_path)
            if img is None:
                print(f"Warning: Failed to load image {img_full_path}")
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = hands.process(img_rgb)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    x_ = [lm.x for lm in hand_landmarks.landmark]
                    y_ = [lm.y for lm in hand_landmarks.landmark]

                    data_aux = []
                    for i in range(21):
                        data_aux.append(x_[i] - min(x_))
                        data_aux.append(y_[i] - min(y_))

                    if len(data_aux) == 42:
                        data.append(data_aux)
                        labels.append(dir_)
                    else:
                        print(f"Skipped image {img_path} in class {dir_} due to incomplete landmarks")
            else:
                print(f"No hand landmarks detected in {img_path}")

    if not data or not labels:
        print("Error: No valid data collected.")
        exit()

    # Create column names in format x0, y0, x1, y1, ..., x20, y20
    columns = [f'{coord}{i}' for i in range(21) for coord in ['x', 'y']]
    data_df = pd.DataFrame(data, columns=columns)
    data_df['label'] = labels

    # Save to CSV
    data_df.to_csv('data.csv', index=False)
    print(f"Data saved to data.csv with {len(data_df)} samples.")

except Exception as e:
    print(f"Error during preprocessing: {e}")
finally:
    hands.close()

print("Preprocessing complete.")


Starting data preprocessing...
Processing class 0...
Processing class 1...
Processing class 10...
Processing class 11...
Processing class 12...
Processing class 13...
Processing class 14...
Processing class 15...
Processing class 16...
Processing class 17...
Processing class 18...
Processing class 19...
Processing class 2...
Processing class 20...
Processing class 21...
Processing class 22...
Processing class 23...
Processing class 3...
Processing class 4...
Processing class 5...
Processing class 6...
Processing class 7...
Processing class 8...
Processing class 9...
Data saved to data.csv with 7202 samples.
Preprocessing complete.
