<a href="https://colab.research.google.com/github/LeoDinga/DL_Project/blob/main/extract_keypoints.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning Project: Action Recognition in Tennis
Students: Diana Santos (64478), Leonor Fandinga (64481), Sofia Rocha (65111)

Professor: Nuno Garcia

In [None]:
import os

#Installs the libraries needed to extract the keypoints
!pip install mediapipe opencv-python tqdm --upgrade



### Before executing the next code cell, the Google Colab runtime must be restarted.

In [None]:
#Execution time: 33 min

import os
import cv2
import numpy as np
import mediapipe as mp #
from tqdm import tqdm

def convert_video_to_npy(video_path, resize_shape=(224, 224)):
    """
    Reads a video frame by frame from the specified path, resizes each frame to the indicated size (default: (224, 224) )
    and returns a NumPy array containing all the frames.

    Parameters:
        video_path (str): Path to the video file (.avi).
        resize_shape (tuple): Size to resize each frame, (height, width). Default=(224, 224)

    Note: In the end the video is saved as an .npy file, containing all the frames converted into image arrays.
    """
    cap = cv2.VideoCapture(video_path)
    frames = []
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {video_path}")

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_resized = cv2.resize(frame, resize_shape)
        frames.append(frame_resized)

    cap.release()
    return np.array(frames)

def create_npy_from_videos(src_dir, npy_dir):
    """
    For each video in a source directory, converts the video into a NumPy array of frames
    (using the secundary function convert_video_to_npy) and saves each array as a .npy file
    in a destination directory, organising them by class/action.

    Parameters:
        rc_dir (str): Path to the source directory containing subfolders of videos by action.
        npy_dir (str): Path to the destination directory, where the .npy arrays will be saved.
    """
    os.makedirs(npy_dir, exist_ok=True)
    for action in os.listdir(src_dir):
        action_path = os.path.join(src_dir, action)
        if not os.path.isdir(action_path):
            continue
        dest_action_path = os.path.join(npy_dir, action)
        os.makedirs(dest_action_path, exist_ok=True)
        for video_file in os.listdir(action_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(action_path, video_file)
                output_path = os.path.join(dest_action_path, video_file.replace(".avi", ".npy"))
                try:
                    frames_array = convert_video_to_npy(video_path)
                    np.save(output_path, frames_array)
                except Exception as e:
                    print(f"Error processing {video_file}: {e}")

def pad_or_truncate_keypoints(keypoints, target_length=120):
    """
    Sets the number of keypoint frames to a fixed size (default: 120).
    If there are fewer frames than the target, fill with zeros (pad); if there are more, cut (truncate).

    Parameters:
        keypoints (np.ndarray): array of keypoints with shape (num_frames, num_points, 3).
        target_length (int): Desired number of frames in the output.

    Note: Each frame of the video was converted into a NumPy array representing the image.
    """
    num_frames = keypoints.shape[0]
    if num_frames < target_length:
        padding = np.zeros((target_length - num_frames, keypoints.shape[1], keypoints.shape[2]))
        return np.concatenate((keypoints, padding), axis=0)
    else:
        return keypoints[:target_length]

def extract_keypoints_from_npy(npy_dir, save_path="all_keypoints.npz"):
    """
    Extracts pose keypoints from videos stored in .npy arrays using the MediaPipe library and saves them as .npz files.

    Parameters:
        npy_dir (str): Directory containing .npy arrays of videos organised by action.
        save_path (str): Path of the output .npz file, where it will be saved.
    """
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose(static_image_mode=True)
    all_keypoints = {}

    for action in tqdm(os.listdir(npy_dir), desc="Processing actions"):
        action_path = os.path.join(npy_dir, action)
        if not os.path.isdir(action_path):
            continue

        all_keypoints[action] = {}
        for video_file in os.listdir(action_path):
            if not video_file.endswith(".npy"):
                continue
            video_path = os.path.join(action_path, video_file)
            try:
                sample = np.load(video_path)
                if sample.ndim != 4 or sample.shape[-1] != 3:
                    continue
                sample = sample.astype(np.uint8)

                video_keypoints = []
                for frame in sample[::5]:
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    results = pose.process(frame_rgb)
                    if results.pose_landmarks:
                        keypoints = [[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]
                    else:
                        keypoints = np.zeros((33, 3)).tolist()
                    video_keypoints.append(keypoints)

                if video_keypoints:
                    kp_array = np.array(video_keypoints)
                    kp_array = pad_or_truncate_keypoints(kp_array, target_length=120)
                    all_keypoints[action][video_file] = kp_array
            except Exception as e:
                print(f"Error with {video_file}: {e}")

    #Transform into a simple dictionary to save arrays with np.savez_compressed
    flat_dict = {}
    for action, videos in all_keypoints.items():
        for video_file, arr in videos.items():
            #key: concatenation of the action name with the video name
            key = f"{action}__{video_file.replace('.npy', '')}"

            #value: the corresponding keypoint array
            flat_dict[key] = arr

    np.savez_compressed(save_path, **flat_dict)
    print(f"Keypoints saved to {save_path}")



# Clone dataset if necessary
if not os.path.exists("dataset/VIDEO_RGB"):
    os.system("git clone --filter=blob:none --no-checkout https://github.com/THETIS-dataset/dataset.git")
    os.chdir("dataset")
    os.system("git sparse-checkout init --cone")
    os.system("git sparse-checkout set VIDEO_RGB")
    os.system("git checkout")
    os.chdir("..")

src_dir = "dataset/VIDEO_RGB"
npy_dir = "npy_videos"
create_npy_from_videos(src_dir, npy_dir)
extract_keypoints_from_npy(npy_dir, save_path="all_keypoints.npz")




Processing actions: 100%|██████████| 12/12 [24:13<00:00, 121.10s/it]


Keypoints saved to all_keypoints.npz
