In [1]:
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import mediapipe as mp
from collections import deque
import os
import pickle

In [2]:
# Preprocessing function to extract landmarks
def extract_hand_landmarks(results):
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])
            return landmarks
    return None

def center_landmarks(landmarks):
    """
    Center and scale hand landmarks around the wrist (landmark 0).

    Parameters:
        landmarks (list or np.ndarray): Flat list or array of 63 values (21 landmarks * 3 coordinates)

    Returns:
        np.ndarray: Preprocessed landmarks, same shape (63,)
    """
    # Convert to numpy array
    landmarks = np.array(landmarks)

    # Reshape to (21, 3)
    landmarks = landmarks.reshape((21, 3))

    # Step 1: Centering - subtract wrist coordinates
    wrist = landmarks[0]
    centered = landmarks - wrist

    # Step 2: Scaling - normalize by maximum distance from wrist
    # Compute Euclidean distances from wrist to each point
    distances = np.linalg.norm(centered, axis=1)
    max_distance = distances.max()

    # To avoid division by zero (if max_distance is 0)
    if max_distance > 0:
        scaled = centered / max_distance
    else:
        scaled = centered  # if hand is not detected well, just keep centered

    # Flatten back to (63,)
    return scaled.flatten()


# Define MediaPipe hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
PATH = os.path.join(os.getcwd(),'archive', 'LIS-fingerspelling-dataset')
with mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7, max_num_hands=1) as hands:
    for j in os.listdir(PATH):
        count=0
        if j != 'readme.txt': 
            for k in os.listdir(PATH+ '/'+str(j)):
                path = PATH +'/'+ str(j)+'/'+str(k)
                img = cv2.imread(path,cv2.IMREAD_COLOR)
                results = hands.process(img)
                landmarks = extract_hand_landmarks(results)
                if landmarks:
                    # Center and scale landmarks
                    landmarks = center_landmarks(landmarks)
                    count += 1
                    #print('Extracting landmarks from images:', count, 'from', len(os.listdir(PATH+ '/'+str(j))), 'images in', str(j))
                    tmp_path = os.path.join(os.getcwd(), 'archive', 'landmarks',str(j))+'/'+str(k)[:-4]+'.pkl'
                    # Save
                    with open(tmp_path, 'wb') as f:
                        pickle.dump(landmarks, f)
                    mp_drawing.draw_landmarks(img, results.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS)
                    tmp_path_imgs = os.path.join(os.getcwd(), 'archive', 'landmarked_images',str(j))+'/'+str(k)[:-4]+'.jpg'
                    cv2.imwrite(tmp_path_imgs, img)
            print('Extracted landmarks from images:', count, 'from', len(os.listdir(PATH+ '/'+str(j))), 'images in', str(j))



I0000 00:00:1744673272.939297   50940 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1744673272.950153   51005 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.2.8-1ubuntu1~24.04.1), renderer: AMD Radeon Graphics (radeonsi, renoir, LLVM 19.1.1, DRM 3.59, 6.11.0-19-generic)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1744673273.018119   50986 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744673273.069812   50985 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1744673273.652907   50994 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Extracted landmarks from images: 282 from 299 images in e
Extracted landmarks from images: 62 from 309 images in q
Extracted landmarks from images: 210 from 246 images in a
Extracted landmarks from images: 55 from 283 images in p
Extracted landmarks from images: 27 from 328 images in m
Extracted landmarks from images: 267 from 312 images in x
Extracted landmarks from images: 191 from 311 images in y
Extracted landmarks from images: 32 from 274 images in n
Extracted landmarks from images: 0 from 275 images in v
Extracted landmarks from images: 240 from 254 images in c
Extracted landmarks from images: 128 from 328 images in u
Extracted landmarks from images: 273 from 281 images in f
Extracted landmarks from images: 191 from 309 images in l
Extracted landmarks from images: 246 from 272 images in o
Extracted landmarks from images: 295 from 314 images in w
Extracted landmarks from images: 224 from 299 images in h
Extracted landmarks from images: 219 from 273 images in d
Extracted landmarks 