In [1]:
!pip uninstall -y mediapipe
!pip install --upgrade --force-reinstall mediapipe

Found existing installation: mediapipe 0.10.21
Uninstalling mediapipe-0.10.21:
  Successfully uninstalled mediapipe-0.10.21
Collecting mediapipe
  Using cached mediapipe-0.10.21-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting absl-py (from mediapipe)
  Using cached absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting attrs>=19.1.0 (from mediapipe)
  Using cached attrs-25.4.0-py3-none-any.whl.metadata (10 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Using cached flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting jax (from mediapipe)
  Using cached jax-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe)
  Using cached jaxlib-0.8.0-cp312-cp312-manylinux_2_27_x86_64.whl.metadata (1.3 kB)
Collecting matplotlib (from mediapipe)
  Using cached matplotlib-3.10.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting numpy<2 (from mediapipe)
  Using cached numpy-1.26.4-cp312-cp312-manyli

In [2]:
# Step 0: Imports and Drive Mount

import os
import cv2
# !pip install mediapipe
import mediapipe as mp
import numpy as np
import pandas as pd
import json
from pathlib import Path
from tqdm.notebook import tqdm # This will give us a nice progress bar

# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [3]:
# --- Step 1: Define Constants and Paths ---

# --- IMPORTANT: Change this to your project's folder on Drive! ---
BASE_PATH = Path('/content/drive/MyDrive/ASL-HuggingFace-Data/videos')

# 1. Our processing settings
SEQUENCE_LENGTH = 50 # 50 frames per video

# 2. Our input files (the CSVs we created in the last step)
PROTOTYPE_TRAIN_CSV = '/content/drive/MyDrive/dataset/train/prototype_train.csv'
PROTOTYPE_VAL_CSV = '/content/drive/MyDrive/dataset/val/prototype_val.csv'
PROTOTYPE_TEST_CSV = '/content/drive/MyDrive/dataset/test/prototype_test.csv'

# 3. Where we will save our new, processed data
# We'll create a new folder for our clean NumPy arrays
OUTPUT_DATA_PATH = Path('/content/drive/MyDrive/dataset/Process_Abdo/processed_prototype')
OUTPUT_LABEL_MAP = Path('/content/drive/MyDrive/dataset/Process_Abdo/label_map.json')

# Create the output directories if they don't exist
os.makedirs(OUTPUT_DATA_PATH, exist_ok=True)

In [4]:
# --- Step 2: Create and Save the Label Map ---

# Read the training CSV to find all unique words
train_df = pd.read_csv(PROTOTYPE_TRAIN_CSV)

# Get sorted unique labels (words)
unique_labels = sorted(train_df['word'].unique())

# Create the {word: id} mapping
label_map = {word: i for i, word in enumerate(unique_labels)}

# Save the map as a JSON file
with open(OUTPUT_LABEL_MAP, 'w') as f:
    json.dump(label_map, f, indent=4)

print(f"Label map created with {len(label_map)} classes and saved to {OUTPUT_LABEL_MAP}")

Label map created with 100 classes and saved to /content/drive/MyDrive/dataset/Process_Abdo/label_map.json


In [5]:
# --- Step 3: Define the Feature Extractor Class ---

class FeatureExtractor:
    def __init__(self, sequence_length=50):
        # 1. Initialize MediaPipe Holistic
        self.mp_holistic = mp.solutions.holistic.Holistic(
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5)

        # 2. Set the sequence length (number of frames)
        self.sequence_length = sequence_length

        # 3. Calculate feature count (Pose + 2 Hands, XY only)
        # Pose: 33 landmarks * 2 coords = 66
        # Hands: 21 landmarks * 2 coords * 2 hands = 84
        # Total = 150
        self.pose_landmarks = 33
        self.hand_landmarks = 21
        self.feature_count = (self.pose_landmarks * 2) + (self.hand_landmarks * 2 * 2) # 150

    def get_feature_count(self):
        return self.feature_count

    def extract_features(self, video_path):
        """Processes a single video and returns its (seq_length, feature_count) array."""

        # Create an empty array to store features
        # Shape: (50, 150)
        keypoints = np.zeros((self.sequence_length, self.feature_count))

        cap = cv2.VideoCapture(str(video_path))
        if not cap.isOpened():
            print(f"Error opening video file: {video_path}")
            return None

        frame_num = 0
        while frame_num < self.sequence_length:
            ret, frame = cap.read()
            if not ret:
                # Video ended early, the rest of the `keypoints` array will be zeros (padding)
                break

            # Convert the BGR image to RGB
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Process the image with MediaPipe
            results = self.mp_holistic.process(image_rgb)

            # Extract features for this frame
            frame_features = self._get_frame_features(results)
            keypoints[frame_num, :] = frame_features

            frame_num += 1

        cap.release()
        return keypoints

    def _get_frame_features(self, results):
        """Extracts and flattens features from a single MediaPipe result."""

        frame_features = np.zeros(self.feature_count)

        if results.pose_landmarks:
            pose_arr = np.array(
                [[res.x, res.y] for res in results.pose_landmarks.landmark]
            ).flatten()
            frame_features[0:66] = pose_arr

        if results.left_hand_landmarks:
            lh_arr = np.array(
                [[res.x, res.y] for res in results.left_hand_landmarks.landmark]
            ).flatten()
            frame_features[66:108] = lh_arr

        if results.right_hand_landmarks:
            rh_arr = np.array(
                [[res.x, res.y] for res in results.right_hand_landmarks.landmark]
            ).flatten()
            frame_features[108:150] = rh_arr

        return frame_features

print("FeatureExtractor class defined.")

FeatureExtractor class defined.


In [6]:
# --- Step 4: Run the Full Processing Pipeline (Resumable) ---

print("Starting the processing pipeline (Resumable Mode)...")
print("This will SKIP any files that are already processed.")

# 1. Initialize our extractor
# (Make sure you have run the Step 3 cell first to define FeatureExtractor)
extractor = FeatureExtractor(sequence_length=SEQUENCE_LENGTH)

# 2. Define the datasets we need to process
datasets_to_process = {
    'train': PROTOTYPE_TRAIN_CSV,
    'val': PROTOTYPE_VAL_CSV,
    'test': PROTOTYPE_TEST_CSV
}

# 3. Loop over each dataset (train, val, test)
for set_name, csv_path in datasets_to_process.items():
    print(f"\n--- Processing {set_name} set ---")

    # Read the CSV (e.g., prototype_train.csv)
    df = pd.read_csv(csv_path)

    # Create the output directories for this set
    output_path_X = OUTPUT_DATA_PATH / set_name / 'X'
    output_path_y = OUTPUT_DATA_PATH / set_name / 'y'
    os.makedirs(output_path_X, exist_ok=True)
    os.makedirs(output_path_y, exist_ok=True)

    # Use tqdm for a progress bar
    # We iterate using index for unique filenames
    for i, row in tqdm(df.iterrows(), total=len(df), desc=f"Processing {set_name}"):

        # --- RESUME LOGIC (The only change is here) ---
        # 1. Define the *potential* output filenames first
        base_filename = f"{set_name}_{i:06d}" # e.g., train_000001
        output_file_X = output_path_X / f"{base_filename}.npy"
        output_file_y = output_path_y / f"{base_filename}.npy"

        # 2. Check if *both* files already exist
        if os.path.exists(output_file_X) and os.path.exists(output_file_y):
            continue # Skip this file, it's already done

        # --- REGULAR PROCESSING (if files don't exist) ---

        # If we are here, it means the files are missing and we need to process them
        video_path = row['full_path']
        word = row['word']

        # 3. Get the label (e.g., 0, 1, 2...) from our map
        label = label_map[word]

        # 4. Extract features (the (50, 150) array)
        features = extractor.extract_features(video_path)

        if features is not None:
            # 5. Save the features and the label as .npy files
            # (Filenames are already defined above)
            np.save(output_file_X, features)
            np.save(output_file_y, label)

print("\n--- Pipeline Finished! ---")
print(f"All processed data saved in: {OUTPUT_DATA_PATH}")

Starting the processing pipeline (Resumable Mode)...
This will SKIP any files that are already processed.

--- Processing train set ---


Processing train:   0%|          | 0/8916 [00:00<?, ?it/s]


--- Processing val set ---


Processing val:   0%|          | 0/1115 [00:00<?, ?it/s]

Error opening video file: /content/drive/MyDrive/ASL-HuggingFace-Data/videos/part_9/kiss_20241119_172646_38.mp4
Error opening video file: /content/drive/MyDrive/ASL-HuggingFace-Data/videos/part_10/pop_20241119_172650_62.mp4

--- Processing test set ---


Processing test:   0%|          | 0/1115 [00:00<?, ?it/s]


--- Pipeline Finished! ---
All processed data saved in: /content/drive/MyDrive/dataset/Process_Abdo/processed_prototype
