In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
import csv




In [2]:
# --- Configuration ---
DATA_DIR = './data'
# Consider changing the output filename to indicate raw coordinates
OUTPUT_CSV_FILE = 'landmark_data_raw_new.csv' # <-- Changed filename
NUM_LANDMARKS = 21

In [3]:
# --- MediaPipe Initialization ---
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True,
                       max_num_hands=1,
                       min_detection_confidence=0.5)

In [4]:
# --- Data Collection ---
all_rows = []
processed_files = 0
skipped_files = 0

print(f"Starting data collection from: {DATA_DIR}")

Starting data collection from: ./data


In [5]:
# Define headers
headers = []
for i in range(1, NUM_LANDMARKS + 1):
    headers.extend([f'x{i}', f'y{i}'])
headers.append('label')

print(f"CSV Headers: {headers}")

CSV Headers: ['x1', 'y1', 'x2', 'y2', 'x3', 'y3', 'x4', 'y4', 'x5', 'y5', 'x6', 'y6', 'x7', 'y7', 'x8', 'y8', 'x9', 'y9', 'x10', 'y10', 'x11', 'y11', 'x12', 'y12', 'x13', 'y13', 'x14', 'y14', 'x15', 'y15', 'x16', 'y16', 'x17', 'y17', 'x18', 'y18', 'x19', 'y19', 'x20', 'y20', 'x21', 'y21', 'label']


In [6]:
# Iterate through classes
class_names = sorted([d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))])
class_names = [c for c in class_names if c.upper() not in ('J', 'Z')]
print(f"Processing classes: {class_names}")

Processing classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']


In [7]:
for class_name in class_names:
    class_dir = os.path.join(DATA_DIR, class_name)
    print(f"\nProcessing class: {class_name}")

    image_files = [f for f in os.listdir(class_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    print(f" Found {len(image_files)} images.")

    class_processed_count = 0
    class_skipped_count = 0

    for img_name in image_files:
        img_path = os.path.join(class_dir, img_name)
        img = cv2.imread(img_path)
        if img is None:
            class_skipped_count += 1
            continue

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_rgb.flags.writeable = False
        results = hands.process(img_rgb)
        img_rgb.flags.writeable = True

        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]

            # --- Raw Landmark Extraction (Normalization Removed) ---
            try:
                # Directly extract x, y for each landmark
                raw_landmark_coords = []
                for lm in hand_landmarks.landmark: # Iterate through the 21 landmarks
                    raw_landmark_coords.extend([lm.x, lm.y])

                # Ensure we got the correct number of coordinates (should be 42)
                if len(raw_landmark_coords) != NUM_LANDMARKS * 2:
                    print(f"  Warning: Incorrect number of coordinates ({len(raw_landmark_coords)}) extracted for {img_path}. Skipping.")
                    class_skipped_count += 1
                    continue

                # --- Create the row for this image ---
                # Use the raw landmark coordinates directly
                landmark_coords = raw_landmark_coords

                # Append the class name
                row_data = landmark_coords + [class_name]

                # Add this row to our main list
                all_rows.append(row_data)
                class_processed_count += 1

            except Exception as e:
                 print(f"  Warning: Error during landmark extraction for {img_path}. Skipping. Error: {e}")
                 class_skipped_count += 1

        else:
            # No hand detected
            class_skipped_count += 1

    print(f" Finished class {class_name}. Processed: {class_processed_count}, Skipped: {class_skipped_count}")
    processed_files += class_processed_count
    skipped_files += class_skipped_count

hands.close()

print("\nData collection and feature extraction finished.")
print(f"Total images processed successfully: {processed_files}")
print(f"Total images skipped: {skipped_files}")
print(f"Number of data rows created: {len(all_rows)}")


Processing class: A
 Found 200 images.
 Finished class A. Processed: 200, Skipped: 0

Processing class: B
 Found 200 images.
 Finished class B. Processed: 200, Skipped: 0

Processing class: C
 Found 200 images.
 Finished class C. Processed: 200, Skipped: 0

Processing class: D
 Found 200 images.
 Finished class D. Processed: 200, Skipped: 0

Processing class: E
 Found 200 images.
 Finished class E. Processed: 200, Skipped: 0

Processing class: F
 Found 200 images.
 Finished class F. Processed: 200, Skipped: 0

Processing class: G
 Found 200 images.
 Finished class G. Processed: 200, Skipped: 0

Processing class: H
 Found 200 images.
 Finished class H. Processed: 200, Skipped: 0

Processing class: I
 Found 200 images.
 Finished class I. Processed: 200, Skipped: 0

Processing class: K
 Found 200 images.
 Finished class K. Processed: 200, Skipped: 0

Processing class: L
 Found 200 images.
 Finished class L. Processed: 200, Skipped: 0

Processing class: M
 Found 200 images.
 Finished clas

In [8]:
# --- Save the data to CSV ---
if all_rows:
    print(f"\nSaving data to {OUTPUT_CSV_FILE}...")
    try:
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(headers)
            writer.writerows(all_rows)
        print("Data saved successfully.")
        print("\n******************************************************")
        print("** WARNING: Data saved WITHOUT normalization.         **")
        print("** Model performance will likely be significantly     **")
        print("** worse compared to using normalized landmarks.      **")
        print("******************************************************")
    except IOError as e:
        print(f"Error saving CSV file: {e}")
else:
    print("\nError: No data was collected or processed successfully. CSV file not saved.")


Saving data to landmark_data_raw_new.csv...
Data saved successfully.

******************************************************
** Model performance will likely be significantly     **
** worse compared to using normalized landmarks.      **
******************************************************
