In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
import os, time, json
from pathlib import Path
!pip install "numpy<2" # for non confilcte with mediapipe
import cv2, numpy as np, pandas as pd
from tqdm.notebook import tqdm
!pip install mediapipe
import mediapipe as mp
from google.colab import drive



In [10]:
# ---------------- CONFIG (المسارات النهائية) ----------------

PROJECT_ROOT = Path('/content/drive/MyDrive/ASL-Project')

PROTOTYPE_TRAIN_CSV = PROJECT_ROOT / 'Data/metadata/1st_Prototype(100_word)/train/prototype_train.csv'
PROTOTYPE_VAL_CSV   = PROJECT_ROOT / 'Data/metadata/1st_Prototype(100_word)/val/prototype_val.csv'
PROTOTYPE_TEST_CSV  = PROJECT_ROOT / 'Data/metadata/1st_Prototype(100_word)/test/prototype_test.csv'


OUTPUT_BASE = PROJECT_ROOT / 'Data/processed_prototype/1st_prototype_processed'
OUT_X       = OUTPUT_BASE / 'X'
OUT_y       = OUTPUT_BASE / 'y'
OUT_OVERLAY = OUTPUT_BASE / 'overlays'
OUT_META    = OUTPUT_BASE / 'meta'


for p in (OUT_X, OUT_y, OUT_OVERLAY, OUT_META):
    p.mkdir(parents=True, exist_ok=True)

SEQUENCE_LENGTH = 50
SAMPLE_FPS = 15
MP_MIN_DET_CONF = 0.5
MP_MIN_TRK_CONF = 0.5
CONF_THRESH = 0.5
VISUAL_OVERLAY_SAVE_FIRST_N = 2
RESUME = True

print("✅ Setup Done. Output Path:", OUTPUT_BASE)

✅ Setup Done. Output Path: /content/drive/MyDrive/ASL-Project/Data/processed_prototype/1st_prototype_processed


In [11]:
mp_holistic = mp.solutions.holistic

# ---------------- Helper Functions ----------------
def normalize_hand(pts):
    """تطبيع اليد بالنسبة للمعصم"""
    ref = pts[0].copy()  # wrist
    scale = np.linalg.norm(pts[9] - ref)  # distance to middle finger mcp
    if scale < 1e-6: scale = 1.0
    return (pts - ref) / scale

def choose_best_hands(multi_hand_landmarks, multi_handedness):
    """اختيار أفضل يد بناءً على الثقة"""
    chosen = {}
    if multi_hand_landmarks is None or multi_handedness is None:
        return chosen
    for lm, hd in zip(multi_hand_landmarks, multi_handedness):
        label = hd.classification[0].label.upper()
        conf  = float(hd.classification[0].score)
        if conf < CONF_THRESH: continue
        pts = np.array([[p.x, p.y, p.z] for p in lm.landmark], dtype=np.float32)
        chosen[label] = pts
    return chosen

def compute_torso_center_and_scale(pose_landmarks):
    """حساب مركز الجذع وحجمه للتوحيد (Torso Normalization)"""
    torso_center = np.array([0.5, 0.5], dtype=np.float32)
    torso_scale = 1.0
    try:
        ps = pose_landmarks
        def get_xy(idx):
            lm = ps.landmark[idx]
            return np.array([lm.x, lm.y], dtype=np.float32)

        left_sh, right_sh = get_xy(11), get_xy(12)
        left_hip, right_hip = get_xy(23), get_xy(24)

        # Center = average of shoulders and hips centers
        shoulder_center = (left_sh + right_sh) / 2.0
        hip_center = (left_hip + right_hip) / 2.0
        torso_center = (shoulder_center + hip_center) / 2.0

        # Scale = max width (shoulders or hips)
        shoulder_dist = np.linalg.norm(left_sh - right_sh)
        hip_dist = np.linalg.norm(left_hip - right_hip)
        torso_scale = max(shoulder_dist, hip_dist, 1e-6)
    except: pass
    return torso_center, float(torso_scale)

# ---------------- Feature Extractor Class ----------------
class FeatureExtractor:
    def __init__(self, seq_len=SEQUENCE_LENGTH, sample_fps=SAMPLE_FPS):
        self.seq_len = seq_len
        self.sample_fps = sample_fps
        self.hol = mp_holistic.Holistic(
            min_detection_confidence=MP_MIN_DET_CONF,
            min_tracking_confidence=MP_MIN_TRK_CONF
        )
        # 198 Features Total
        self.feature_count = 198

    def close(self):
        try: self.hol.close()
        except: pass

    def _frame_feature(self, results):
        feat = np.zeros(self.feature_count, dtype=np.float32)

        # 1. Pose (Torso Normalized)
        torso_center = np.array([0.5, 0.5], dtype=np.float32)
        torso_scale = 1.0

        if results.pose_landmarks:
            torso_center, torso_scale = compute_torso_center_and_scale(results.pose_landmarks)
            # Pose XY (33 points)
            pose_xy = np.array([[lm.x, lm.y] for lm in results.pose_landmarks.landmark], dtype=np.float32)
            pose_norm = (pose_xy - torso_center[None, :]) / torso_scale
            feat[0:66] = pose_norm.flatten()

        # 2. Hands (Wrist Normalized + Relative Pos)
        chosen = choose_best_hands(getattr(results, 'multi_hand_landmarks', None),
                                   getattr(results, 'multi_handedness', None))

        # Left Hand
        left_start = 66
        if 'LEFT' in chosen:
            left_pts = chosen['LEFT']
            # Shape
            feat[left_start:left_start+63] = normalize_hand(left_pts)[:, :3].flatten()
            # Relative Wrist Position
            wrist = left_pts[0]
            wrist_rel = np.array([(wrist[0] - torso_center[0]) / torso_scale,
                                  (wrist[1] - torso_center[1]) / torso_scale,
                                  wrist[2] / max(torso_scale, 1e-6)], dtype=np.float32)
            feat[left_start+63:left_start+66] = wrist_rel

        # Right Hand
        right_start = left_start + 66
        if 'RIGHT' in chosen:
            right_pts = chosen['RIGHT']
            # Shape
            feat[right_start:right_start+63] = normalize_hand(right_pts)[:, :3].flatten()
            # Relative Wrist Position
            wrist = right_pts[0]
            wrist_rel = np.array([(wrist[0] - torso_center[0]) / torso_scale,
                                  (wrist[1] - torso_center[1]) / torso_scale,
                                  wrist[2] / max(torso_scale, 1e-6)], dtype=np.float32)
            feat[right_start+63:right_start+66] = wrist_rel

        return feat

    def extract(self, video_path):
        cap = cv2.VideoCapture(str(video_path))
        if not cap.isOpened(): return None

        video_fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
        skip = max(1, int(round(video_fps / self.sample_fps)))

        buffer = []
        frame_idx = 0
        success_count = 0

        while len(buffer) < self.seq_len:
            ret, frame = cap.read()
            if not ret: break
            if frame_idx % skip != 0:
                frame_idx += 1; continue

            img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            res = self.hol.process(img_rgb)
            feat = self._frame_feature(res)

            if feat.sum() == 0: buffer.append(None)
            else: buffer.append(feat); success_count += 1
            frame_idx += 1

        cap.release()

        # Padding & Interpolation
        if len(buffer) < self.seq_len:
            buffer += [None] * (self.seq_len - len(buffer))

        for i in range(len(buffer)):
            if buffer[i] is None:
                prev = next((buffer[j] for j in range(i-1, -1, -1) if buffer[j] is not None), None)
                nxt  = next((buffer[j] for j in range(i+1, len(buffer)) if buffer[j] is not None), None)
                if prev is not None and nxt is not None: buffer[i] = (prev + nxt) / 2.0
                elif prev is not None: buffer[i] = prev.copy()
                elif nxt is not None: buffer[i] = nxt.copy()
                else: buffer[i] = np.zeros(self.feature_count, dtype=np.float32)

        return np.stack(buffer[:self.seq_len], axis=0), {'success': int(success_count), 'fps': video_fps}

def save_overlay_video_short(input_path, out_path, max_frames=None):
    cap = cv2.VideoCapture(str(input_path))
    if not cap.isOpened(): return False
    w, h, fps = int(cap.get(3)), int(cap.get(4)), cap.get(5) or 25.0
    out = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
    hol = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
    mp_draw = mp.solutions.drawing_utils

    saved = 0
    while True:
        ret, frame = cap.read()
        if not ret: break
        res = hol.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if res.pose_landmarks: mp_draw.draw_landmarks(frame, res.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
        if res.left_hand_landmarks: mp_draw.draw_landmarks(frame, res.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        if res.right_hand_landmarks: mp_draw.draw_landmarks(frame, res.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        out.write(frame)
        saved += 1
        if max_frames and saved >= max_frames: break
    hol.close(); cap.release(); out.release()
    return True

print("✅ Functions & Classes Defined.")

✅ Functions & Classes Defined.


In [12]:
# --- لازم نعرف القاموس ده قبل ما اللوب تشتغل ---
import pandas as pd

# 1. نقرأ ملف التدريب عشان نجيب الكلمات
train_df = pd.read_csv(PROTOTYPE_TRAIN_CSV)

# 2. نجهز القاموس
unique_labels = sorted(train_df['word'].unique())
label_map = {w: i for i, w in enumerate(unique_labels)}

print("✅ Label Map Re-loaded successfully!")

✅ Label Map Re-loaded successfully!


In [13]:
# Cell 4: Main Execution (With Safety Checkpoints)

# ---------------- Initialization ----------------
extractor = FeatureExtractor(seq_len=SEQUENCE_LENGTH, sample_fps=SAMPLE_FPS)

datasets = {
    'train': PROTOTYPE_TRAIN_CSV,
    'val'  : PROTOTYPE_VAL_CSV,
    'test' : PROTOTYPE_TEST_CSV
}

per_sample_meta = []
processed_total = 0
skipped_total   = 0
overlay_saved   = 0

start_all = time.time()

# دالة للحفظ المؤقت (الأمان)
def save_checkpoint_meta(meta_list, out_folder):
    if not meta_list: return
    temp_df = pd.DataFrame(meta_list)
    temp_df.to_csv(out_folder / 'per_sample_meta_checkpoint.csv', index=False)

# ---------------- Main Loop ----------------
for set_name, csv_path in datasets.items():
    print(f"\n=== Processing set: {set_name} ===")

    if not os.path.exists(csv_path):
        print(f"❌ Error: CSV file not found at {csv_path}")
        continue

    df = pd.read_csv(csv_path).reset_index(drop=True)

    out_dir_X_set = OUT_X / set_name
    out_dir_y_set = OUT_y / set_name
    out_dir_X_set.mkdir(parents=True, exist_ok=True)
    out_dir_y_set.mkdir(parents=True, exist_ok=True)

    for i, row in tqdm(df.iterrows(), total=len(df), desc=f"{set_name}"):
        t0 = time.time()

        base_filename = f"{set_name}_{i:06d}"
        out_x_file = out_dir_X_set / f"{base_filename}.npy"
        out_y_file = out_dir_y_set / f"{base_filename}.npy"

        # 1. Resume Logic
        if RESUME and out_x_file.exists() and out_y_file.exists():
            skipped_total += 1
            continue

        # 2. Prepare Data
        video_path = row['full_path']
        word       = row.get('word', None)
        label      = label_map.get(word, -1) if word is not None else -1

        # 3. Extract Features
        res = extractor.extract(video_path)
        if res is None:
            # print(f"[SKIP] Cannot process: {video_path}") # Uncomment to see errors
            skipped_total += 1
            continue

        seq, meta = res

        # 4. Save NPY Files
        np.save(out_x_file, seq.astype(np.float32))
        np.save(out_y_file, np.array(label, dtype=np.int32))

        # 5. Metadata Entry
        proc_time = time.time() - t0
        entry = {
            'set': set_name, 'index': int(i), 'video': str(video_path),
            'label_word': word, 'label_id': int(label),
            'x_path': str(out_x_file), 'y_path': str(out_y_file),
            'requested_seq_len': int(meta.get('requested_len', SEQUENCE_LENGTH)),
            'success_frames': int(meta.get('success_frames', 0)),
            'video_fps': float(meta.get('video_fps', np.nan)),
            'sample_fps': float(SAMPLE_FPS),
            'feature_count': int(seq.shape[1]),
            'proc_seconds': float(proc_time),
            'timestamp': time.time()
        }

        # 6. Save Overlay (Only first N videos)
        if overlay_saved < VISUAL_OVERLAY_SAVE_FIRST_N:
            try:
                overlay_path = OUT_OVERLAY / f"{base_filename}_overlay.mp4"
                ok = save_overlay_video_short(video_path, overlay_path, max_frames=SEQUENCE_LENGTH)
                if ok:
                    entry['overlay_saved'] = str(overlay_path)
                    overlay_saved += 1
            except: pass

        per_sample_meta.append(entry)
        processed_total += 1

        # 7. --- AUTO SAVE CHECKPOINT (Every 500 files) ---
        # دي الإضافة المهمة للأمان
        if processed_total % 500 == 0:
            save_checkpoint_meta(per_sample_meta, OUT_META)

elapsed_all = time.time() - start_all
extractor.close()

# ---------------- Final Save -------------------
meta_df = pd.DataFrame(per_sample_meta)
meta_csv  = OUT_META / 'per_sample_meta.csv'
meta_json = OUT_META / 'per_sample_meta.json'
meta_df.to_csv(meta_csv, index=False)
meta_df.to_json(meta_json, orient='records', lines=False, force_ascii=False)

summary = {
    'total_processed_samples': int(processed_total),
    'total_skipped_existing': int(skipped_total),
    'sample_seq_len': int(SEQUENCE_LENGTH),
    'sample_fps': float(SAMPLE_FPS),
    'feature_count': int(meta_df['feature_count'].iloc[0]) if len(meta_df) > 0 else None,
    'overlay_saved_count': int(overlay_saved),
    'time_seconds': float(elapsed_all),
    'timestamp': time.time()
}

with open(OUT_META / 'summary.json', 'w') as f:
    json.dump(summary, f, indent=2, ensure_ascii=False)
pd.DataFrame([summary]).to_csv(OUT_META / 'summary.csv', index=False)

print("\n================ DONE ================")
print("X directory:   ", OUT_X)
print("y directory:   ", OUT_y)
print("Overlays:      ", OUT_OVERLAY)
print("Meta files:    ", OUT_META)
print("Samples meta:  ", len(per_sample_meta))
print("Summary:       ", summary)


=== Processing set: train ===


train:   0%|          | 0/8916 [00:00<?, ?it/s]


=== Processing set: val ===


val:   0%|          | 0/1115 [00:00<?, ?it/s]


=== Processing set: test ===


test:   0%|          | 0/1115 [00:00<?, ?it/s]


X directory:    /content/drive/MyDrive/ASL-Project/Data/processed_prototype/1st_prototype_processed/X
y directory:    /content/drive/MyDrive/ASL-Project/Data/processed_prototype/1st_prototype_processed/y
Overlays:       /content/drive/MyDrive/ASL-Project/Data/processed_prototype/1st_prototype_processed/overlays
Meta files:     /content/drive/MyDrive/ASL-Project/Data/processed_prototype/1st_prototype_processed/meta
Samples meta:   0
Summary:        {'total_processed_samples': 0, 'total_skipped_existing': 11146, 'sample_seq_len': 50, 'sample_fps': 15.0, 'feature_count': None, 'overlay_saved_count': 0, 'time_seconds': 15.844616413116455, 'timestamp': 1764340270.463885}
