In [3]:
import cv2
import mediapipe as mp
import numpy as np
import json
import os
from tqdm import tqdm
from math import sqrt, atan2

# --- MediaPipe Solutions ---
mp_holistic = mp.solutions.holistic

# --- Configuration Based on MSE-GCN Paper ---
# The paper specifies using 65 joints in total.
# We will select 23 pose landmarks + 21 for each hand = 65 total landmarks.
config = {
    "VIDEO_SOURCE_DIR": r"D:\WLASL DataSet\WLASL FULL\videos",
    "SPLIT_FILE_PATH": r"D:\WLASL DataSet\WLASL FULL\nslt_300.json",
    "OUTPUT_NPZ_PATH": r"D:\MSE-GCN paper methodology\Landmarks\MSE_GCN_features.npz",
    "SAVE_CHECKPOINT_EVERY_N_VIDEOS": 250,

    # 23 pose landmarks to capture body and face movements as described in the paper [cite: 189]
    "POSE_LANDMARKS_TO_EXTRACT": [
        mp_holistic.PoseLandmark.NOSE, mp_holistic.PoseLandmark.LEFT_EYE_INNER,
        mp_holistic.PoseLandmark.LEFT_EYE, mp_holistic.PoseLandmark.LEFT_EYE_OUTER,
        mp_holistic.PoseLandmark.RIGHT_EYE_INNER, mp_holistic.PoseLandmark.RIGHT_EYE,
        mp_holistic.PoseLandmark.RIGHT_EYE_OUTER, mp_holistic.PoseLandmark.LEFT_EAR,
        mp_holistic.PoseLandmark.RIGHT_EAR, mp_holistic.PoseLandmark.MOUTH_LEFT,
        mp_holistic.PoseLandmark.MOUTH_RIGHT, mp_holistic.PoseLandmark.LEFT_SHOULDER,
        mp_holistic.PoseLandmark.RIGHT_SHOULDER, mp_holistic.PoseLandmark.LEFT_ELBOW,
        mp_holistic.PoseLandmark.RIGHT_ELBOW, mp_holistic.PoseLandmark.LEFT_WRIST,
        mp_holistic.PoseLandmark.RIGHT_WRIST, mp_holistic.PoseLandmark.LEFT_PINKY,
        mp_holistic.PoseLandmark.RIGHT_PINKY, mp_holistic.PoseLandmark.LEFT_INDEX,
        mp_holistic.PoseLandmark.RIGHT_INDEX, mp_holistic.PoseLandmark.LEFT_THUMB,
        mp_holistic.PoseLandmark.RIGHT_THUMB
    ],

    # Define bone connections for Bone Stream features (length and angle)
    "BONE_CONNECTIONS": [
        # Arms
        (mp_holistic.PoseLandmark.LEFT_SHOULDER, mp_holistic.PoseLandmark.LEFT_ELBOW),
        (mp_holistic.PoseLandmark.LEFT_ELBOW, mp_holistic.PoseLandmark.LEFT_WRIST),
        (mp_holistic.PoseLandmark.RIGHT_SHOULDER, mp_holistic.PoseLandmark.RIGHT_ELBOW),
        (mp_holistic.PoseLandmark.RIGHT_ELBOW, mp_holistic.PoseLandmark.RIGHT_WRIST),
        # Torso
        (mp_holistic.PoseLandmark.LEFT_SHOULDER, mp_holistic.PoseLandmark.RIGHT_SHOULDER),
    ]
}

def get_joint_and_bone_labels():
    """Generates labels for the 65 joints and defined bones."""
    joint_labels = [lm.name for lm in config["POSE_LANDMARKS_TO_EXTRACT"]]
    for i in range(21):
        joint_labels.append(f"LEFT_HAND_{i}")
    for i in range(21):
        joint_labels.append(f"RIGHT_HAND_{i}")

    bone_labels = []
    for start_lm, end_lm in config["BONE_CONNECTIONS"]:
        bone_labels.append(f"BONE_{start_lm.name}_TO_{end_lm.name}")

    return joint_labels, bone_labels

def process_frame_for_mse_gcn(results):
    """
    Processes a single frame's landmarks to generate the 'joints' and 'bones' streams
    as per the MSE-GCN paper methodology.
    """
    # --- 1. Extract all 65 raw joint coordinates (x, y) ---
    raw_joints = np.zeros((65, 2), dtype=np.float32) # Using 2D coordinates as per paper
    if results.pose_landmarks:
        for i, lm_enum in enumerate(config["POSE_LANDMARKS_TO_EXTRACT"]):
            lm = results.pose_landmarks.landmark[lm_enum]
            raw_joints[i] = [lm.x, lm.y]

    if results.left_hand_landmarks:
        for i, lm in enumerate(results.left_hand_landmarks.landmark):
            raw_joints[23 + i] = [lm.x, lm.y]

    if results.right_hand_landmarks:
        for i, lm in enumerate(results.right_hand_landmarks.landmark):
            raw_joints[44 + i] = [lm.x, lm.y]

    # --- 2. Calculate Features for the "Joints" Stream ---
    joint_features = np.zeros((65, 4), dtype=np.float32)
    joint_features[:, :2] = raw_joints # First 2 channels are original (x, y)

    # Calculate center node v_t,c (midpoint of shoulders) for relative position [cite: 241]
    center_node = np.zeros(2)
    if results.pose_landmarks:
        l_sh = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_SHOULDER]
        r_sh = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.RIGHT_SHOULDER]
        center_node = np.array([(l_sh.x + r_sh.x) / 2, (l_sh.y + r_sh.y) / 2])

    # Calculate relative positions and add to the next 2 channels [cite: 238, 240]
    # If a joint was not detected (is [0,0]), its relative position will also be zero
    relative_positions = raw_joints - center_node
    joint_features[:, 2:] = relative_positions

    # --- 3. Calculate Features for the "Bones" Stream ---
    num_bones = len(config["BONE_CONNECTIONS"])
    bone_features = np.zeros(num_bones * 2, dtype=np.float32) # length and angle for each bone

    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        for i, (start_lm, end_lm) in enumerate(config["BONE_CONNECTIONS"]):
            start_pt = np.array([landmarks[start_lm].x, landmarks[start_lm].y])
            end_pt = np.array([landmarks[end_lm].x, landmarks[end_lm].y])

            # Calculate Bone Length (B_L) [cite: 244]
            diff = end_pt - start_pt
            length = sqrt(diff[0]**2 + diff[1]**2)
            
            # Calculate Bone Angle (B_A) [cite: 248]
            # We use atan2 for a stable 2D orientation angle.
            angle = atan2(diff[1], diff[0])

            bone_features[i*2] = length
            bone_features[i*2 + 1] = angle

    return joint_features, bone_features

def extract_features_from_video(video_path, holistic_model):
    """
    Extracts MSE-GCN compliant joint and bone feature sequences from a video file.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Warning: Could not open video {video_path}")
        return None, None

    joint_sequence = []
    bone_sequence = []

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = holistic_model.process(image)
            image.flags.writeable = True

            joint_features, bone_features = process_frame_for_mse_gcn(results)
            joint_sequence.append(joint_features)
            bone_sequence.append(bone_features)
    finally:
        cap.release()

    if not joint_sequence:
        return None, None

    return np.array(joint_sequence, dtype=np.float32), np.array(bone_sequence, dtype=np.float32)



In [4]:
def main_extraction():
    """
    Main function to process all videos and save their features
    into INDIVIDUAL files to prevent memory errors.
    """
    # Create a directory to store the individual feature files
    output_dir = "MSE_GCN_features_individual"
    os.makedirs(output_dir, exist_ok=True)
    print(f"Features will be saved in: {output_dir}")

    with open(config["SPLIT_FILE_PATH"], 'r') as f:
        data = json.load(f)

    video_ids = list(data.keys())
    
    holistic = mp_holistic.Holistic(static_image_mode=False,
                                      model_complexity=1,
                                      min_detection_confidence=0.5,
                                      min_tracking_confidence=0.5)

    processed_count = 0
    try:
        for vid in tqdm(video_ids, desc="Processing Videos"):
            path = os.path.join(config["VIDEO_SOURCE_DIR"], f"{vid}.mp4")
            output_path = os.path.join(output_dir, f"{vid}.npz")

            # Skip if the file has already been processed
            if os.path.exists(output_path):
                continue
            
            if not os.path.exists(path):
                continue
            
            # --- This is the same function from the previous script ---
            joint_seq, bone_seq = extract_features_from_video(path, holistic)

            if joint_seq is not None:
                # Save the features for THIS video immediately
                np.savez_compressed(
                    output_path,
                    joint_sequence=joint_seq,
                    bone_sequence=bone_seq
                )
                processed_count += 1

    finally:
        holistic.close()
        print(f"\nExtraction complete. Processed {processed_count} new videos.")
        # We no longer need a final save because we saved incrementally.

if __name__ == "__main__":
    # Run the extraction process
    main_extraction()

Features will be saved in: MSE_GCN_features_individual


Processing Videos: 100%|██████████| 5118/5118 [20:16:29<00:00, 14.26s/it]        



Extraction complete. Processed 5116 new videos.


In [6]:
import numpy as np
import os
from tqdm import tqdm

def combine_npz_files(input_dir, output_path):
    """
    Combines all .npz files from an input directory into a single compressed .npz file.

    Args:
        input_dir (str): The directory containing the individual .npz files.
        output_path (str): The path to save the final combined .npz file.
    """
    all_features = {}
    
    # Check if the input directory exists
    if not os.path.isdir(input_dir):
        print(f"Error: Input directory not found at '{input_dir}'")
        return

    file_list = [f for f in os.listdir(input_dir) if f.endswith('.npz')]
    
    if not file_list:
        print(f"No .npz files found in '{input_dir}'")
        return

    print(f"Found {len(file_list)} files to combine.")

    # Loop through each file, load it, and add it to the dictionary
    for filename in tqdm(file_list, desc="Combining Files"):
        video_id = os.path.splitext(filename)[0]
        file_path = os.path.join(input_dir, filename)
        
        try:
            with np.load(file_path) as data:
                # Store the data in a nested dictionary
                all_features[video_id] = {
                    'joint_sequence': data['joint_sequence'],
                    'bone_sequence': data['bone_sequence']
                }
        except Exception as e:
            print(f"Could not process file {filename}: {e}")

    # Save the entire dictionary into a single compressed .npz file
    print(f"\nSaving combined data to {output_path}...")
    np.savez_compressed(output_path, **all_features)
    
    print("Combining complete!")
    print(f"Your final file is ready at: {output_path}")

if __name__ == "__main__":
    # --- Configuration ---
    # Directory where you saved your individual landmark files
    INPUT_DIRECTORY = r"D:\MSE-GCN paper methodology\MSE_GCN_features_individual"
    
    # The name of the final, combined file
    OUTPUT_FILE = r"D:\MSE-GCN paper methodology\Landmarks/MSE_GCN_combined_features.npz"

    combine_npz_files(INPUT_DIRECTORY, OUTPUT_FILE)

Found 5116 files to combine.


Combining Files:   0%|          | 0/5116 [00:00<?, ?it/s]

Combining Files: 100%|██████████| 5116/5116 [01:39<00:00, 51.62it/s] 



Saving combined data to D:\MSE-GCN paper methodology\Landmarks/MSE_GCN_combined_features.npz...
Combining complete!
Your final file is ready at: D:\MSE-GCN paper methodology\Landmarks/MSE_GCN_combined_features.npz
