##### Initialization

In [2]:
import cv2
import numpy as np
import os
import shutil
from matplotlib import pyplot as plt
import mediapipe as mp
from PIL import Image

### 1.0 Extract Keypoints from Video

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image,model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Color conversion from BGR to RGB
    image.flags.writeable = False                   # Image is no longer writeable
    results = model.process(image)                  # Make prediction
    image.flags.writeable = True                    # Image is no longer writeable
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)   # Color conversion RGB to BGR
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)  # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)    # Draw right connections

def draw_styled_landmarks(image,results):
    # Draw pose connection
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(0,0,255), thickness=5,circle_radius=5),
                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=5,circle_radius=5)
                              )
    # Draw left hand connection
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=5,circle_radius=5),
                              mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=5,circle_radius=5)
                              )
    # Draw right hand connection
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=5,circle_radius=5),
                              mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=5,circle_radius=5)
                              )
    
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose,lh,rh])

##### Directory

In [4]:
video_directory = r'Error'

##### Setting up paths

In [5]:
# Get all file names in the directory
gestures_files = os.listdir(video_directory)

gesture_folder = np.array(gestures_files)
print('Total Gestures: ', len(gesture_folder))
print(gesture_folder)

Total Gestures:  1
['abang']


In [6]:
sum = 0

for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)

    sum += len(gesture)

    print(gestures, end =" : ")        
    print(len(gesture))

print('Total Videos: ', sum)

abang : 1
Total Videos:  1


In [7]:
# Estimated Time
time = sum * 18 / 60
print('Estimated Time: ', time, 'minutes')

Estimated Time:  0.3 minutes


In [8]:
# Create landmark folder
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)
            
    for ges in gesture:
        file = os.path.splitext(ges)
        pre_path = os.path.join(video_directory, gestures, file[0])
        landmark_path = os.path.join(video_directory, gestures, file[0], 'landmarks')
        npy_path = os.path.join(landmark_path, )
                    
        if not os.path.exists(landmark_path):
            os.makedirs(landmark_path)
            print(landmark_path + " created")
        else:
            print(landmark_path + " already exists")
            # # if exist, delete folder and recreate the folder
            # shutil.rmtree(pre_path, ignore_errors=True)
            # os.makedirs(landmark_path)
            # print("Exisiting " + landmark_path + " deleted and recreated")

Error\abang\01\landmarks created


In [9]:
# Create save location array
save_location_arr = []

for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)
            
    for ges in gesture:
        file = os.path.splitext(ges)
        save_location_arr.append(os.path.join(video_directory, gestures, file[0]))

print(save_location_arr)

['Error\\abang\\01']


In [10]:
# Video Path array
video_path_arr = []

for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)
            
    for ges in gesture:
        file = os.path.splitext(ges)
        video_path_arr.append(os.path.join(video_directory, gestures, file[0] + '.mp4'))

print(video_path_arr)

['Error\\abang\\01.mp4']


##### Extracting landmarks

In [11]:
# %%script false
# iterate through the video path array and save the landmarks as images and npy files
for video_path, save_location in zip(video_path_arr, save_location_arr):
    video = cv2.VideoCapture(video_path)

    frame_count = 0

    while video.isOpened():
        ret, frame = video.read()

        if not ret:
            break

        frame_count += 1

        # Edit condifidence and model complexity
        with mp_holistic.Holistic(min_detection_confidence=0.1, min_tracking_confidence=0.1, model_complexity=2, smooth_landmarks=True) as holistic:
            frame, results = mediapipe_detection(frame, holistic)
            # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            draw_styled_landmarks(frame, results)

        ''' Disabled to save time '''
        # # create the dark image
        # black = np.zeros(frame.shape , np.uint8)

        # # Replace the `img` with `black` while drawing the landmarks
        # draw_styled_landmarks(black, results)

        # frame_save_path = f'{save_location}/{frame_count}.png'
        # cv2.imwrite(frame_save_path, black)

        # Save the landmarks as npy file
        npy_save_path = f'{save_location}/landmarks/{frame_count}.npy'
        np.save(npy_save_path, extract_keypoints(results))

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    video.release()
    cv2.destroyAllWindows()

    # If frame_count > 30, print video path
    if frame_count > 30:
        extra = frame_count - 30
        print(video_path + " exceeds by " + str(extra) + " frames")
    if frame_count < 30:
        extra = frame_count - 30
        print(video_path + " not enough " + str(extra) + " frames")

### 2.0 Missing Keypoints Reconstruction

##### Defining functions

In [None]:
def initialize_hand_keypoints(frames_keypoints):
    """
    Initialize and the hand keypoints in a sequence of frames.

    Parameters:
    frames_keypoints (list of np.ndarray): List where each entry is a numpy array representing keypoints 
                                           for a frame, or None if keypoints are missing in that frame.
                                           
    Returns:
    list of np.ndarray: List with hand keypoints initialized and interpolated as needed.
    """
    # Define indices for left and right hand keypoints within the overall keypoint array
    pose_keypoints_count = 33 * 4
    left_hand_start = pose_keypoints_count
    left_hand_end = left_hand_start + 21 * 3
    right_hand_start = left_hand_end
    right_hand_end = right_hand_start + 21 * 3

    '''
    array[1, 1, 1, 1, 0, 0 ,0 ,1 ,1 ,1]
    '''
    
    # Extract valid hand keypoints for averaging
    valid_left_hand_keypoints = [kp[left_hand_start:left_hand_end] for kp in frames_keypoints if kp is not None and (kp[left_hand_start] != 0.00000000e+00)]
    valid_right_hand_keypoints = [kp[right_hand_start:right_hand_end] for kp in frames_keypoints if kp is not None and (kp[right_hand_start] != 0.00000000e+00)]
    
    print("valid_left_hand", len(valid_left_hand_keypoints))
    print("valid_right_hand", len(valid_right_hand_keypoints))

    # Calculate average hand keypoints
    if valid_left_hand_keypoints:
        avg_left_hand = np.mean(valid_left_hand_keypoints, axis=0)
    else:
        avg_left_hand = np.zeros(21 * 3)
    
    if valid_right_hand_keypoints:
        avg_right_hand = np.mean(valid_right_hand_keypoints, axis=0)
    else:
        avg_right_hand = np.zeros(21 * 3)

    # Initialize the first and last frames if they have missing hand keypoints
    if frames_keypoints[0] is None or not np.any(frames_keypoints[0][left_hand_start:left_hand_end]):
        frames_keypoints[0][left_hand_start:left_hand_end] = avg_left_hand
    if frames_keypoints[0] is None or not np.any(frames_keypoints[0][right_hand_start:right_hand_end]):
        frames_keypoints[0][right_hand_start:right_hand_end] = avg_right_hand
    
    if frames_keypoints[-1] is None or not np.any(frames_keypoints[-1][left_hand_start:left_hand_end]):
        frames_keypoints[-1][left_hand_start:left_hand_end] = avg_left_hand
    if frames_keypoints[-1] is None or not np.any(frames_keypoints[-1][right_hand_start:right_hand_end]):
        frames_keypoints[-1][right_hand_start:right_hand_end] = avg_right_hand

    return frames_keypoints

In [28]:
def find_neighbour_keypoints(frames_keypoints, k, frame, hand_start, hand_end):
    alpha_left = alpha_right = beta_left = beta_right = None

    # Search for α (previous frame with keypoints)
    for a in range(1, k + 1):
        if frames_keypoints[k - a] is not None and np.any(frames_keypoints[k - a][hand_start:hand_end]):
            alpha_left = a
            break

    # Search for β (next frame with keypoints)
    for b in range(1, len(frames_keypoints) - k):
        if frames_keypoints[k + b] is not None and np.any(frames_keypoints[k + b][hand_start:hand_end]):
            beta_left = b
            break

    # If α and β are found, interpolate
    if alpha_left is not None and beta_left is not None:
        f_k_alpha_left = frames_keypoints[k - alpha_left][hand_start:hand_end]
        f_k_beta_left = frames_keypoints[k + beta_left][hand_start:hand_end]
        frame[hand_start:hand_end] = (beta_left * f_k_alpha_left + alpha_left * f_k_beta_left) / (alpha_left + beta_left)

def bilinear_interpolation(frames_keypoints):
    """
    Apply bilinear interpolation to fill missing hand keypoints based on the provided formula.
    
    Parameters:
    frames_keypoints (list of np.ndarray): List where each entry is a numpy array representing keypoints 
                                           for a frame, or None if keypoints are missing in that frame.
                                           
    Returns:
    list of np.ndarray: List of frames with interpolated hand keypoints.
    """
    # Define indices for left and right hand keypoints within the overall keypoint array
    pose_keypoints_count = 33 * 4
    left_hand_start = pose_keypoints_count
    left_hand_end = left_hand_start + 21 * 3
    right_hand_start = left_hand_end
    right_hand_end = right_hand_start + 21 * 3

    # Process each frame
    for k in range(len(frames_keypoints)):
        frame = frames_keypoints[k]
        
        # Check if current frame's hand keypoints are missing
        if frame is None or (frame[left_hand_start] == 0.00000000e+00 or frame[right_hand_start] == 0.00000000e+00):
            # Find α and β for left and right hands
            # Initialize α and β to None as we search

            if frame[left_hand_start] == 0.00000000e+00:
                find_neighbour_keypoints(frames_keypoints, k, frame, left_hand_start, left_hand_end)
                print('frame ', k + 1, ' left hand keypoints interpolated')

            if frame[right_hand_start] == 0.00000000e+00:
                find_neighbour_keypoints(frames_keypoints, k, frame, right_hand_start, right_hand_end)
                print('frame ', k + 1, ' right hand keypoints interpolated')

    return frames_keypoints

In [29]:
def load_keypoints(folder_path):
    """
    Load the keypoints from a given file path.
    
    Parameters:
    path (str): Path to the .npy file containing the keypoints.
    
    Returns:
    list of np.ndarray: List of frames where each frame is a numpy array representing keypoints.
    """
    frames_keypoints = []

    files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]
    # print(files)

    # arrange files in ascending order
    files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    print(files)

    # Load the keypoints from the .npy file
    for file in files:
        file_path = os.path.join(folder_path, file)
        frame_keypoints = np.load(file_path)
        frames_keypoints.append(frame_keypoints)

    return frames_keypoints

In [30]:
def save_keypoints(frames_keypoints, folder_path):
    """
    Save the keypoints to a given file path.
    
    Parameters:
    frames_keypoints (list of np.ndarray): List of frames where each frame is a numpy array representing keypoints.
    folder_path (str): Path to the folder where the keypoints will be saved.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        # if exist, delete folder and recreate the folder
        shutil.rmtree(folder_path, ignore_errors=True)
        os.makedirs(folder_path)

    for i, frame_keypoints in enumerate(frames_keypoints):
        file_path = os.path.join(folder_path, f'{i + 1}.npy')
        np.save(file_path, frame_keypoints)

##### Directory

In [31]:
# video_directory = r'DATASET'

##### Setting up paths

In [32]:
# Get all file names in the directory
gestures_files = os.listdir(video_directory)

gesture_folder = np.array(gestures_files)
print('Total Gestures: ', len(gesture_folder))
print(gesture_folder)

Total Gestures:  1
['abang']


In [33]:
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)

    print(gestures, end =" : ")        
    print(gesture)

abang : ['01']


In [34]:
# Create landmark folder
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)
            
    for ges in gesture:
        file = os.path.splitext(ges)
        pre_path = os.path.join(video_directory, gestures, file[0])
        interpolated_path = os.path.join(video_directory, gestures, file[0], 'interpolated_landmarks')
        npy_path = os.path.join(interpolated_path, )
                    
        if not os.path.exists(interpolated_path):
            os.makedirs(interpolated_path)
            print(interpolated_path + " created")
        else:
            print(interpolated_path + " already exists")
            # # if exist, delete folder and recreate the folder
            # shutil.rmtree(pre_path, ignore_errors=True)
            # os.makedirs(interpolated_path)
            # print("Exisiting " + interpolated_path + " deleted and recreated")

Error\abang\01\interpolated_landmarks already exists


##### Bilinear Interpolation

In [35]:
# %%script false
# Perform interpolation on the landmarks
for gestures in gesture_folder:
    gesture = []

    for fname in os.listdir(os.path.join(video_directory, gestures)):
        path = os.path.join(video_directory, gestures, fname)
        if os.path.isdir(path):
            continue
        else:
            ges = os.path.splitext(fname)[0]
            gesture.append(ges)
            
    for ges in gesture:
        file = os.path.splitext(ges)
        load_path = os.path.join(video_directory, gestures, file[0], 'landmarks')
        save_path = os.path.join(video_directory, gestures, file[0], 'interpolated_landmarks')
        frames_keypoints = load_keypoints(load_path)
        frames_keypoints = initialize_hand_keypoints(frames_keypoints)
        frames_keypoints = bilinear_interpolation(frames_keypoints)
        save_keypoints(frames_keypoints, save_path)

['1.npy', '2.npy', '3.npy', '4.npy', '5.npy', '6.npy', '7.npy', '8.npy', '9.npy', '10.npy', '11.npy', '12.npy', '13.npy', '14.npy', '15.npy', '16.npy', '17.npy', '18.npy', '19.npy', '20.npy', '21.npy', '22.npy', '23.npy', '24.npy', '25.npy', '26.npy', '27.npy', '28.npy', '29.npy', '30.npy']
valid_left_hand 20
valid_right_hand 0
frame  1  right hand keypoints interpolated
frame  2  right hand keypoints interpolated
frame  3  right hand keypoints interpolated
frame  4  right hand keypoints interpolated
frame  5  right hand keypoints interpolated
frame  6  right hand keypoints interpolated
frame  7  right hand keypoints interpolated
frame  8  right hand keypoints interpolated
frame  9  right hand keypoints interpolated
frame  10  right hand keypoints interpolated
frame  11  right hand keypoints interpolated
frame  12  right hand keypoints interpolated
frame  13  right hand keypoints interpolated
frame  14  right hand keypoints interpolated
frame  15  right hand keypoints interpolated
fram

In [36]:
video_directory = r'Error'

##### Single Frame

In [37]:
def plot_keypoints(keypoints, title):
    # Separate pose, left hand, and right hand keypoints based on sizes
    pose = keypoints[:33 * 4].reshape(-1, 4)        # 33 keypoints, each with [x, y, z, visibility]
    left_hand = keypoints[33 * 4:33 * 4 + 21 * 3].reshape(-1, 3)  # 21 keypoints for left hand, each with [x, y, z]
    right_hand = keypoints[33 * 4 + 21 * 3:].reshape(-1, 3)       # 21 keypoints for right hand, each with [x, y, z]

    # Define connections (edges) between keypoints for pose, left hand, and right hand
    # These connections are based on a standard pose model, such as MediaPipe's Pose model.
    pose_connections = [
        (0, 1), (1, 2), (2, 3), (3, 7), (0, 4), (4, 5), (5, 6), (6, 8), (9, 10),
        (11, 12), (11, 13), (13, 15), (15, 17), (15, 19), (15, 21), (17, 19),
        (12, 14), (14, 16), (16, 18), (16, 20), (16, 22), (18, 20), (23, 24),
        (23, 25), (24, 26), (25, 27), (26, 28), (27, 29), (28, 30), (29, 31), (30, 32)
    ]

    hand_connections = [
        (0, 1), (1, 2), (2, 3), (3, 4), # Thumb
        (0, 5), (5, 6), (6, 7), (7, 8), # Index finger
        (0, 9), (9, 10), (10, 11), (11, 12), # Middle finger
        (0, 13), (13, 14), (14, 15), (15, 16), # Ring finger
        (0, 17), (17, 18), (18, 19), (19, 20) # Pinky
    ]

    # Plot keypoints with connections
    plt.figure(figsize=(4.8, 2.7))
    plt.axis([0, 1.125, 0, 2])

    # Plot pose keypoints and connections
    for (start, end) in pose_connections:
        plt.plot([pose[start, 0], pose[end, 0]], [pose[start, 1], pose[end, 1]], 'k-', lw=2)
    plt.scatter(pose[:, 0], pose[:, 1], label='Pose', s=20, alpha=0.7)

    # Plot left hand keypoints and connections
    for (start, end) in hand_connections:
        plt.plot([left_hand[start, 0], left_hand[end, 0]], [left_hand[start, 1], left_hand[end, 1]], 'b-', lw=2)
    plt.scatter(left_hand[:, 0], left_hand[:, 1], label='Left Hand', s=20, alpha=0.7)

    # Plot right hand keypoints and connections
    for (start, end) in hand_connections:
        plt.plot([right_hand[start, 0], right_hand[end, 0]], [right_hand[start, 1], right_hand[end, 1]], 'r-', lw=2)
    plt.scatter(right_hand[:, 0], right_hand[:, 1], label='Right Hand', s=20, alpha=0.7)

    # Adjust plot settings
    plt.title(title)
    plt.legend()
    # Adjust plot settings
    plt.legend()
    ax = plt.gca()
    leg = ax.get_legend()
    leg.legend_handles[0].set_color('black')
    leg.legend_handles[1].set_color('blue')
    leg.legend_handles[2].set_color('red')
    plt.axis('off')
    plt.gca().invert_yaxis()  # Invert Y axis for typical image coordinates
    plt.show()

In [38]:
def visualize_interpolation(gesture, video_num, frame_num):
    """
    Visualize the original and interpolated keypoints for a specific frame in a video.
    
    Parameters:
    gesture (str): Name of the gesture.
    video_num (int): Number of the video.
    frame_num (int): Number of the frame to visualize.
    """
    # Load original and interpolated keypoints
    npy_file = f'{frame_num}.npy'
    keypoints = np.load(os.path.join(video_directory, gesture, f'{video_num:02}', 'landmarks', npy_file))
    keypoints_interpolated = np.load(os.path.join(video_directory, gesture, f'{video_num:02}', 'interpolated_landmarks', npy_file))
    # keypoints_flipped = np.load(os.path.join(video_directory, gesture, f'{video_num:02}', 'flipped', npy_file))

    # Plot original and interpolated keypoints
    plot_keypoints(keypoints, title='Original Keypoints')
    plot_keypoints(keypoints_interpolated, title='Interpolated Keypoints')
    # plot_keypoints(keypoints_flipped, title='Flipped')

In [39]:
%%script false --no-raise-error
visualize_interpolation('berapa', 28, 30)

Couldn't find program: 'false'


##### Gif

### Results

Hujan
| Original | Interpolated |
| ------------- | ------------- |
| ![display image](gif/hujan/landmarks.gif) | ![display image](gif/hujan/interpolated_landmarks.gif) |

Kedai
| Original | Interpolated |
| ------------- | ------------- |
| ![display image](gif/kedai/landmarks.gif) | ![display image](gif/kedai/interpolated_landmarks.gif) |

Ada
| Original | Interpolated |
| ------------- | ------------- |
| ![display image](gif/ada/landmarks.gif) | ![display image](gif/ada/interpolated_landmarks.gif) |

Saudara
| Original | Interpolated |
| ------------- | ------------- |
| ![display image](gif/saudara/landmarks.gif) | ![display image](gif/saudara/interpolated_landmarks.gif) |