Preprocessing

In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (68.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.7/68.7 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencv-contrib-python, sounddevice, medi

In [None]:
import cv2
import mediapipe as mp
import os
import numpy as np
import json
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Mediapipe model and utilities
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [5]:
# Function to detect and extract landmarks using Mediapipe
def mediapipe_detection(image, model):
    # Convert the image to RGB for Mediapipe processing
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False  # Disable writing to the image for performance
    results = model.process(image)  # Process the image with Mediapipe
    image.flags.writeable = True  # Enable writing back to the image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert back to BGR for OpenCV
    return image, results

# Function to extract pose keypoints from Mediapipe results
def extract_keypoints(results):
    # Extract pose landmarks (33 landmarks: x, y, z, visibility)
    if results.pose_landmarks:
        pose = np.array([[res.x, res.y, res.z, res.visibility]
                         for res in results.pose_landmarks.landmark]).flatten()
    else:
        # If no landmarks are detected, return a zero array
        pose = np.zeros(33 * 4)

    return pose


In [10]:
# Load metadata from JSON
metadata_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/DataSet/WLASL_v0.3.json'
with open(metadata_path, 'r') as file:
    metadata = json.load(file)

# Load top 100 labels from CSV
top_100_csv = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/DataSet/gloss_counts_top_100.csv'
top_100_labels = pd.read_csv(top_100_csv)['Gloss'].tolist()  # Assuming the column is named 'label'

# Create labelMap for only top 100 labels
labelMap = {}
for entry in metadata:
    label = entry['gloss']
    if label in top_100_labels:  # Include only top 100 labels
        for instance in entry['instances']:
            video_id = int(instance['video_id'])
            frame_start = instance['frame_start']
            frame_end = instance['frame_end']
            fps = instance['fps']
            labelMap[video_id] = [label, frame_start, frame_end, fps]

In [11]:
# Create directory for saving processed data
DATA_PATH = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/Models/MediaPipe_Pose_only100'
os.makedirs(DATA_PATH, exist_ok=True)  # Avoid error if directory already exists

# Iterate through videos in the dataset folder, filtering for top 100 labels
video_path = '/content/drive/MyDrive/Colab Notebooks/AAI-521/Final Project/DataSet/videos'
video_files = [
    video for video in os.listdir(video_path)
    if video.endswith('.mp4') and int(os.path.splitext(video)[0]) in labelMap
]

print(f"Found {len(video_files)} videos associated with the top 100 labels.")

Found 1120 videos associated with the top 100 labels.


In [None]:
# Function to process a single video
def process_video(video):
    video_id = int(os.path.splitext(video)[0])
    if video_id not in labelMap:  # Ensure video is in filtered labelMap
        return

    label, start_frame, end_frame, fps = labelMap[video_id]

    # Open video file
    cap = cv2.VideoCapture(os.path.join(video_path, video))
    cap.set(cv2.CAP_PROP_FPS, fps)

    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        keypoints_data = []
        frame_count = 0
        while cap.isOpened():
            success, image = cap.read()
            if not success:
                break
            frame_count += 1

            # Skip frames outside the valid range
            if frame_count < start_frame or (end_frame != -1 and frame_count > end_frame):
                continue

            # Process frame and extract keypoints
            image, results = mediapipe_detection(image, holistic)
            keypoints = extract_keypoints(results)
            keypoints_data.append(keypoints)

        # Save keypoints for the entire video
        action_path = os.path.join(DATA_PATH, label)
        video_path_save = os.path.join(action_path, f'{video_id}_keypoints.npy')
        np.save(video_path_save, np.array(keypoints_data))

    cap.release()


# Pre-create all necessary directories for the top 100 labels
unique_labels = set(labelMap[video_id][0] for video_id in labelMap)  # Labels for the top 100
for label in unique_labels:
    action_path = os.path.join(DATA_PATH, label)
    os.makedirs(action_path, exist_ok=True)  # Create directories for top 100 labels only

# Filter video files to include only those in the top 100 labels
video_files = [video for video in os.listdir(video_path) if video.endswith('.mp4') and int(os.path.splitext(video)[0]) in labelMap]

# Use ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(max_workers=4) as executor:
    list(tqdm(executor.map(process_video, video_files), total=len(video_files), desc="Processing Videos"))





Processing Videos: 100%|██████████| 1120/1120 [18:48<00:00,  1.01s/it]
