In [12]:
from sklearn.model_selection import train_test_split # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore

import os
import numpy as np # type: ignore
import tensorflow as tf # type: ignore

In [2]:
# path for saving the data (numpy array)
DATA_PATH = os.path.join("../datasets")

# sign action to be detected
ACTIONS = np.array(
    [
        "hello",
        "thanks",
        "i-love-you",
        "see-you-later",
        "I",
        "Father",
        "Mother",
        "Yes",
        "No",
        "Help",
        "Please",
        "Want",
        "What",
        "Again",
        "Eat",
        "Milk",
        "More",
        "Go To",
        "Bathroom",
        "Fine",
        "Like",
        "Learn",
        "Sign",
        "Done",
    ]
)

ACTIONS = ACTIONS[:6] # per the current dataset we have rign now

# 60 videos worth of data (per label)
videos_per_label = 60

# 30 action per videos
# NOTE: This does not affect how much the frame is
action_per_video = 30

In [73]:
sequences, labels = [], []

labels_map = {label: index for index, label in enumerate(ACTIONS)}

total_keypoints = ((478 * 3) + (33 * 4) + (21 * 3 * 2))
total_keypoints_per_label = total_keypoints * action_per_video

Further information regarding keypoints for [task vision (face, pose, and hand)](https://ai.google.dev/edge/mediapipe/solutions/guide)

In [74]:
total_keypoints, total_keypoints_per_label

(1692, 50760)

In [75]:
for action in ACTIONS:

    """Iterates over each sequence for the current action"""
    for sequence in range(videos_per_label):
        # empty list (window) to hold the frames of the current sequence.
        sequence_actions = []

        """
        Frame Processing

        Iterates over each frame in the current sequence, then constructs the file path to the numpy array for the current frame.
        Prints the path to verify correctness, then loads the frame data from the numpy file.
        """
        for frame_num in range(action_per_video):
            # construct the path to the numpy file for the current frame
            npy_path = os.path.join(
                DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)
            )

            # load the frame data from the numpy file
            result = np.load(npy_path)

            if not (len(result) == total_keypoints):
                # Check if the total keypoints of the file match the target total keypoints.
                print(
                    f"Action '{action}' for video {str(sequence)} does not have matched total_keypoints : {len(result)}"
                )
                break

            # append the frame data to the current sequence (window)
            sequence_actions.append(result)

        # append the completed sequence to the sequences list
        sequences.append(sequence_actions)

        labels.append(labels_map[action])

In [76]:
len(result) # the last frame of the last frame of the last action video

1692

In [77]:
# check if the total of the labels are same as the length of total actions videos
len(labels) == len(ACTIONS) * videos_per_label

True

In [78]:
to_categorical(labels).astype(int)

array([[1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1]])

#### See how much data we can use for the training

In [79]:
X = np.array(sequences)

# convert labels list to a one-hot encoded NumPy array
y = to_categorical(labels).astype(int)

In [80]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [81]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((288, 30, 1692), (72, 30, 1692), (288, 6), (72, 6))