In [1]:
import os
import numpy as np
import shutil

In [2]:
def load_keypoints(folder_path):
    """
    Load the keypoints from a given file path.
    
    Parameters:
    path (str): Path to the .npy file containing the keypoints.
    
    Returns:
    list of np.ndarray: List of frames where each frame is a numpy array representing keypoints.
    """
    frames_keypoints = []

    files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]
    # print(files)

    # arrange files in ascending order
    files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    print(files)

    # Load the keypoints from the .npy file
    for file in files:
        file_path = os.path.join(folder_path, file)
        frame_keypoints = np.load(file_path)
        frames_keypoints.append(frame_keypoints)

    return frames_keypoints

In [3]:
def save_keypoints(frames_keypoints, folder_path):
    """
    Save the keypoints to a given file path.
    
    Parameters:
    frames_keypoints (list of np.ndarray): List of frames where each frame is a numpy array representing keypoints.
    folder_path (str): Path to the folder where the keypoints will be saved.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        # if exist, delete folder and recreate the folder
        shutil.rmtree(folder_path, ignore_errors=True)
        os.makedirs(folder_path)

    for i, frame_keypoints in enumerate(frames_keypoints):
        file_path = os.path.join(folder_path, f'{i + 1}.npy')
        np.save(file_path, frame_keypoints)

### Normalization + Anchoring
###### Anchor based normalization is used to make the model for robust regardless of signer distance. Pose and hands are normalized seperately to capture hand shapes more efficiently and reduce the weight of positional information.

In [4]:
# %%script false
def normalize_keypoints(input_folder, output_folder, ges):
    '''
    - pose_keypoints: np.array of shape (33*4,), containing (x, y, z, visibility) for each of the 33 pose keypoints.
    - left_hand_keypoints: Optional, np.array of shape (21*3,), containing (x, y, z) for each of the 21 left hand keypoints.
    - right_hand_keypoints: Optional, np.array of shape (21*3,), containing (x, y, z) for each of the 21 right hand keypoints.
    '''
    # Ensure output dir exist
    os.makedirs(output_folder, exist_ok=True)

    # keypoint indices for should and nose
    LEFT_SHOULDER = 11
    RIGHT_SHOULDER = 12
    NOSE = 0

    # Define wrist indices in the hand keypoints
    LEFT_WRIST = 15
    RIGHT_WRIST = 16
    
    npy_files = sorted([f for f in os.listdir(input_folder) if f.endswith('.npy')])
    npy_files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    for npy_file in npy_files:
        # load the keypoints
        keypoints = np.load(os.path.join(input_folder, npy_file))
        pose_keypoints = keypoints[:33*4]  # Extract pose keypoints
        left_hand_keypoints = keypoints[33*4:33*4+21*3]   # Extract left hand keypoints
        right_hand_keypoints = keypoints[33*4+21*3:]  # Extract right hand keypoints
        
        # Reshape pose into 33x4 array
        pose_keypoints = pose_keypoints.reshape(33, 4)

        # Extract (x, y) coordinates of the left and right shoulders
        left_shoulder = pose_keypoints[LEFT_SHOULDER, :2] # (x, y)
        right_shoulder = pose_keypoints[RIGHT_SHOULDER, :2] # (x, y)

        # Calculate neck as midpoint between left and right shoulders
        neck = (left_shoulder + right_shoulder) / 2.0

        # Extract (x, y) coordinates of the nose
        head = pose_keypoints[NOSE, :2]

        # Calculate the normalization factor (distance between neck and nose)
        norm_factor = np.linalg.norm(head - neck)

        # Avoid division by zero
        if norm_factor == 0:
            # get file path for debugging
            print(f"Video {ges} {npy_file} division by zero")

            #set norm_factor to 1 if division by zero
            norm_factor = 1
            

        # Normalize pose keypoints (x_k, y_k)
        normalized_pose = np.copy(pose_keypoints)
        for k in range(33):
            normalized_pose[k, :2] = (pose_keypoints[k, :2] - neck) / norm_factor

        # reshape into 21x3 array
        left_hand_keypoints = left_hand_keypoints.reshape(21, 3)
        right_hand_keypoints = right_hand_keypoints.reshape(21, 3)

        # Get wrist coordinates
        left_wrist = pose_keypoints[LEFT_WRIST, :2] # (x, y)
        right_wrist = pose_keypoints[RIGHT_WRIST, :2] # (x, y)

        if np.all(left_hand_keypoints == 0.00000000e+00):
            # print(f"{npy_file} hand keypoints are all zeros")
            normalized_left = np.copy(left_hand_keypoints)
        else:
            normalized_left = np.copy(left_hand_keypoints)
            for k in range(21):
            # Align left hand to left wrist
                normalized_left[k, :2] = (left_hand_keypoints[k, :2] - left_hand_keypoints[0, :2]) / norm_factor + (left_wrist - neck) / norm_factor

        if np.all(right_hand_keypoints == 0.00000000e+00):
            # print(f"{npy_file} hand keypoints are all zeros")
            normalized_right = np.copy(right_hand_keypoints)
        else:
            normalized_right = np.copy(right_hand_keypoints)
            for k in range(21):
            # Align right hand to right wrist
                normalized_right[k, :2] = (right_hand_keypoints[k, :2] - right_hand_keypoints[0, :2]) / norm_factor + (right_wrist - neck) / norm_factor

        # Concatenate normalized pose and hand keypoints
        normalized_keypoints = np.concatenate([normalized_pose.flatten(), normalized_left.flatten(), normalized_right.flatten()])

        # Save the normalized keypoints
        output_folder_path = os.path.join(output_folder, npy_file)
        np.save(output_folder_path, normalized_keypoints)

        # print(f"Saved normalized keypoints to {output_folder_path}")

In [29]:
%%script false
## ORIGINAL
## For debugging
def normalized_hand_keypoints(pose_keypoints, left_hand_keypoints, right_hand_keypoints):
    # Define wrist indices in the hand keypoints
    LEFT_WRIST = 15
    RIGHT_WRIST = 16

    # Get wrist coordinates
    left_wrist = pose_keypoints[LEFT_WRIST, :3] # (x, y, z)
    right_wrist = pose_keypoints[RIGHT_WRIST, :3] # (x, y, z)

    # Normalize left hand by subtracting left wrist coordinates
    if not np.all(left_hand_keypoints == 0.00000000e+00):
        left_hand_keypoints_normalized =  left_hand_keypoints - left_wrist
    else:
        left_hand_keypoints_normalized = np.zeros((21, 3))

    # Normalize right hand by subtracting right wrist coordinates
    if not np.all(right_hand_keypoints == 0.00000000e+00):
        right_hand_keypoints_normalized = right_hand_keypoints - right_wrist
    else:
        right_hand_keypoints_normalized = np.zeros((21, 3))

    return left_hand_keypoints_normalized, right_hand_keypoints_normalized

def normalize_pose_keypoints(pose_keypoints):
    # keypoint indices for should and nose
    LEFT_SHOULDER = 11
    RIGHT_SHOULDER = 12
    NOSE = 0

    # Extract (x, y) coordinates of the left and right shoulders
    left_shoulder = pose_keypoints[LEFT_SHOULDER, :2]
    right_shoulder = pose_keypoints[RIGHT_SHOULDER, :2]

    # Calculate neck as midpoint between left and right shoulders
    neck = (left_shoulder + right_shoulder) / 2.0

    # Extract (x, y) coordinates of the nose
    head = pose_keypoints[NOSE, :2]

    # Calculate the normalization factor (distance between neck and nose)
    norm_factor = np.linalg.norm(head - neck)

    # Avoid division by zero
    if norm_factor == 0:
        norm_factor = 1

    # Normalize pose keypoints (x_k, y_k)
    normalized_pose_keypoints = np.copy(pose_keypoints)
    for k in range(33):
        normalized_pose_keypoints[k, :2] = (pose_keypoints[k, :2] - neck) / norm_factor

    return normalized_pose_keypoints

def normalize_keypoints(input_folder, output_folder):
    '''
    - pose_keypoints: np.array of shape (33*4,), containing (x, y, z, visibility) for each of the 33 pose keypoints.
    - left_hand_keypoints: Optional, np.array of shape (21*3,), containing (x, y, z) for each of the 21 left hand keypoints.
    - right_hand_keypoints: Optional, np.array of shape (21*3,), containing (x, y, z) for each of the 21 right hand keypoints.
    '''
    # Ensure output dir exist
    os.makedirs(output_folder, exist_ok=True)
    
    npy_files = sorted([f for f in os.listdir(input_folder) if f.endswith('.npy')])
    npy_files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    for npy_file in npy_files:
        # load the keypoints
        keypoints = np.load(os.path.join(input_folder, npy_file))
        pose_keypoints = keypoints[:33*4]  # Extract pose keypoints
        left_hand_keypoints = keypoints[33*4:33*4+21*3]   # Extract left hand keypoints
        right_hand_keypoints = keypoints[33*4+21*3:]  # Extract right hand keypoints
        
        # Reshape pose into 33x4 array
        pose_keypoints = pose_keypoints.reshape(33, 4)

        # reshape into 21x3 array
        left_hand_keypoints = left_hand_keypoints.reshape(21, 3)
        right_hand_keypoints = right_hand_keypoints.reshape(21, 3)

        # Normalize pose keypoints
        normalized_pose_keypoints = normalize_pose_keypoints(pose_keypoints)

        # Normalize hand keypoints
        normalized_left, normalized_right = normalized_hand_keypoints(normalized_pose_keypoints, left_hand_keypoints, right_hand_keypoints)

        # Concatenate normalized pose and hand keypoints
        normalized_keypoints = np.concatenate([normalized_pose_keypoints.flatten(), normalized_left.flatten(), normalized_right.flatten()])

        # Save the normalized keypoints
        output_folder_path = os.path.join(output_folder, npy_file)
        np.save(output_folder_path, normalized_keypoints)

        # print(f"Saved normalized keypoints to {output_folder_path}")

Couldn't find program: 'false'


In [5]:
# %%script false --no-raise-error
video_directory = 'Error'

gesture_folder = np.array(os.listdir(video_directory))
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            gesture.append(fname) 
    
    print(gestures + ' normalization: ')

    for ges in gesture:
        load_path = os.path.join(video_directory, gestures, ges)
        save_path = os.path.join(video_directory, gestures, ges) # replace original file
        frames_keypoints_normalized = normalize_keypoints(load_path, save_path, ges)

perlahan2 normalization: 


### Data Augmentation

In [1]:
# Flipping the images horizontally (Done by inverting the x-coordinates of the keypoints)

def flip_keypoints_horizontally(input_folder, output_folder, image_width=1):
    """
    Flip keypoints horizontally and save them to new .npy files.
    
    Parameters:
    input_folder (str): Path to the folder containing the original .npy files.
    output_folder (str): Path to the folder where the flipped .npy files will be saved.
    image_width (float): Width of the image (or coordinate range) for flipping.
                         Default is 1.0 for normalized coordinates (0 to 1).
    """

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Load all .npy files in sorted order
    npy_files = sorted([f for f in os.listdir(input_folder) if f.endswith('.npy')])
    npy_files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    for npy_file in npy_files:
        # Load keypoints from the .npy file
        keypoints = np.load(os.path.join(input_folder, npy_file))
        
        # Flip the x-coordinates of all keypoints
        # keypoints are in the format [x, y, z, visibility] for each point
        flipped_keypoints = keypoints.copy()
        flipped_keypoints[:33*4:4] = image_width - keypoints[:33*4:4]  # Flip x-coordinates for pose
        flipped_keypoints[33*4::3] = image_width - keypoints[33*4::3]  # Flip x-coordinates for hands

        # relabel the left and right hand keypoints
        temp_left_hand_keypoints = flipped_keypoints[33*4:33*4+21*3].copy()
        temp_right_hand_keypoints = flipped_keypoints[33*4+21*3:].copy()

        flipped_keypoints[33*4:33*4+21*3] = temp_right_hand_keypoints
        flipped_keypoints[33*4+21*3:] = temp_left_hand_keypoints

        # Save the flipped keypoints to a new .npy file
        output_file = os.path.join(output_folder, f"{npy_file}")
        np.save(output_file, flipped_keypoints)

In [7]:
# %%script false --no-raise-error
video_directory = 'TRAIN'

gesture_folder = np.array(os.listdir(video_directory))
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            gesture.append(fname) 

    # initialize file name for flipped npy files
    flip = [int(gesture[-1]) + i + 1 for i in range(len(gesture))]
    
    for ori, flip in zip(gesture, flip):
        load_path = os.path.join(video_directory, gestures, ori)
        save_path = os.path.join(video_directory, gestures, str(flip))
        frames_keypoints_flip = flip_keypoints_horizontally(load_path, save_path)

    print(gestures + ' flipped')

abang flipped
ada flipped
adik_lelaki flipped
adik_perempuan flipped
air flipped
ambil flipped
anak flipped
apa flipped
arah flipped
awak flipped
ayah flipped
baca flipped
bagaimana flipped
bahasa_isyarat flipped
baik flipped
baik2 flipped
bas flipped
bawa flipped
belajar flipped
beli flipped
beli2 flipped
berapa flipped
berjalan flipped
berlari flipped
bila flipped
bola flipped
boleh flipped
bomba flipped
buang flipped
buat flipped
cuaca flipped
curi flipped
dapat flipped
dari flipped
datuk flipped
duit flipped
esok flipped
gambar flipped
hari flipped
hilang flipped
hospital flipped
hujan flipped
ibu flipped
jahat flipped
jalan flipped
jam flipped
jangan flipped
jumpa flipped
kacau flipped
kafeteria flipped
kakak flipped
kedai flipped
keluarga flipped
kereta flipped
kereta_api flipped
khabar_baik flipped
lelaki flipped
lupa flipped
main flipped
makan flipped
mana flipped
marah flipped
marah2 flipped
mari flipped
mari2 flipped
masa flipped
masalah flipped
menyakitkan flipped
minum flip

### Results

abang
| Interpolated | Normalized | Flipped |
| ------------- | ------------- | ------------- |
| ![display image](gif/abang/01_interpolated_landmarks.gif) | ![display image](gif/abang/01_normalized.gif) | ![display image](gif/abang/01_flipped.gif) |

| Interpolated | Normalized | Flipped |
| ------------- | ------------- | ------------- |
| ![display image](gif/abang/05_interpolated_landmarks.gif) | ![display image](gif/abang/05_normalized.gif) | ![display image](gif/abang/05_flipped.gif) |

| Interpolated | Normalized | Flipped |
| ------------- | ------------- | ------------- |
| ![display image](gif/abang/09_interpolated_landmarks.gif) | ![display image](gif/abang/09_normalized.gif) | ![display image](gif/abang/09_flipped.gif) |

ada
| Interpolated | Normalized | Flipped |
| ------------- | ------------- | ------------- |
| ![display image](gif/ada/01_interpolated_landmarks.gif) | ![display image](gif/ada/01_normalized.gif) | ![display image](gif/ada/01_flipped.gif) |

| Interpolated | Normalized | Flipped |
| ------------- | ------------- | ------------- |
| ![display image](gif/ada/02_interpolated_landmarks.gif) | ![display image](gif/ada/02_normalized.gif) | ![display image](gif/ada/02_flipped.gif) |