Preprocess the images 

In [9]:
import cv2
import os
import numpy as np
import mediapipe as mp
from tqdm import tqdm
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5,
)

# Set input and output directories
input_dir = "Letters_Dataset"
output_dir = "Letters_Dataset_Output"

# Initialize empty lists to store landmarks and labels
landmarks = []
labels = []

# Loop through subdirectories in input directory
for subdir in os.listdir(input_dir):
    subdir_path = os.path.join(input_dir, subdir)

    # Check if subdirectory is valid and has images
    if os.path.isdir(subdir_path):
        image_files = [os.path.join(subdir_path, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
        if not image_files:
            continue
        
        # Loop through images in subdirectory
        for image_path in tqdm(image_files, desc=f"Processing {subdir}", unit="image"):
            # Load input image and resize
            image = cv2.imread(image_path)
            image = cv2.resize(image, (672, 672))  # Replace with your desired size

            # Convert image to RGB format and run hand detection
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image)

            # Check if hand(s) were detected
            if results.multi_hand_landmarks:
                # Extract landmarks for detected hand
                for hand_landmarks in results.multi_hand_landmarks:
                    # Normalize landmarks with respect to image size
                    image_height, image_width, _ = image.shape
                    landmarks_norm = np.array([[lmk.x * image_width, lmk.y * image_height, lmk.z] for lmk in hand_landmarks.landmark])

                    # Draw landmarks on image
                    mp_drawing = mp.solutions.drawing_utils
                    image_draw = image.copy()
                    mp_drawing.draw_landmarks(
                        image_draw, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    # Draw connections between landmarks with red lines
                    mp_drawing.draw_landmarks(
                        image_draw, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
                    )

                    # Add landmarks and label to lists
                    landmarks.append(landmarks_norm.flatten())
                    labels.append(subdir)
                    
                    # Save output image
                    output_path = os.path.join(output_dir, subdir, os.path.basename(image_path))
                    os.makedirs(os.path.dirname(output_path), exist_ok=True)
                    cv2.imwrite(output_path, image_draw)


# Clean up
hands.close()
print("Clean up Done")

# Convert landmarks and labels to NumPy arrays
landmarks = np.array(landmarks)
labels = np.array(labels)

# Encode the labels using one-hot encoding
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
onehot_encoder = OneHotEncoder()
labels_one_hot = onehot_encoder.fit_transform(labels_encoded.reshape(-1, 1))

if input_dir == "Letters_Dataset":
    # Save the landmarks and labels files in the "Letters" directory
    np.save("Letters/hand_landmarks.npy", landmarks)
    np.save("Letters/hand_labels.npy", labels)
    print("Store the extracted hand_landmarks.npy and hand_labels.npy Done")
elif input_dir == "Numbers_Dataset":
    # Save the landmarks and labels files in the "Numbers" directory
    np.save("Numbers/hand_landmarks.npy", landmarks)
    np.save("Numbers/hand_labels.npy", labels)
    print("Store the extracted hand_landmarks.npy and hand_labels.npy Done")
else:
    np.save("hand_landmarks.npy", landmarks)
    np.save("hand_labels.npy", labels)
    print("Store the extracted hand_landmarks.npy and hand_labels.npy Done")
    

Processing 1: 100%|█████████████████████████████████████████████████████████████| 1169/1169 [01:14<00:00, 15.73image/s]
Processing 10: 100%|████████████████████████████████████████████████████████████| 1052/1052 [01:04<00:00, 16.19image/s]
Processing 11: 100%|████████████████████████████████████████████████████████████| 1038/1038 [01:06<00:00, 15.70image/s]
Processing 12: 100%|████████████████████████████████████████████████████████████| 1019/1019 [01:04<00:00, 15.81image/s]
Processing 13: 100%|████████████████████████████████████████████████████████████| 1158/1158 [01:13<00:00, 15.68image/s]
Processing 14: 100%|████████████████████████████████████████████████████████████| 1060/1060 [01:05<00:00, 16.08image/s]
Processing 15: 100%|████████████████████████████████████████████████████████████| 1110/1110 [01:11<00:00, 15.59image/s]
Processing 16: 100%|████████████████████████████████████████████████████████████| 1033/1033 [01:05<00:00, 15.88image/s]
Processing 17: 100%|████████████████████

Clean up Done
Store the extracted hand_landmarks.npy and hand_labels.npy Done





Check what's stored in the .npy files

In [1]:
import numpy as np

# Load the .npy file
hand_landmarks = np.load('hand_landmarks.npy')
hand_labels = np.load('hand_labels.npy')

# Print the shape of the data
print(hand_landmarks.shape)
print(hand_labels.shape)

# Reshape data[0] to a 2D array with 3 columns
hand_landmarks_table = hand_landmarks[0].reshape(-1, 3)
hand_labels_table = hand_labels[0]

print(hand_landmarks_table)

(17996, 63)
(17996,)
[[ 3.37330204e+02  4.56753605e+02  8.95528956e-07]
 [ 4.09144758e+02  4.23423243e+02 -5.36280125e-02]
 [ 4.45806078e+02  3.65583921e+02 -6.08321428e-02]
 [ 4.65226135e+02  3.13405798e+02 -6.60104081e-02]
 [ 4.77884943e+02  2.59490679e+02 -7.06487969e-02]
 [ 4.13255533e+02  2.95857940e+02  2.84230113e-02]
 [ 4.29622530e+02  2.32886719e+02  2.65974123e-02]
 [ 4.36262655e+02  1.95435908e+02  6.46114070e-03]
 [ 4.40368464e+02  1.60133882e+02 -1.42610529e-02]
 [ 3.77124573e+02  2.88625351e+02  3.57130431e-02]
 [ 3.93832872e+02  2.21093084e+02  3.48498113e-02]
 [ 4.03848976e+02  1.73748556e+02  2.15861248e-03]
 [ 4.13155037e+02  1.36242621e+02 -2.89214384e-02]
 [ 3.40370287e+02  2.90684924e+02  3.09532043e-02]
 [ 3.57400023e+02  2.28250231e+02  1.84104852e-02]
 [ 3.71441465e+02  1.91035721e+02 -1.96998902e-02]
 [ 3.84010031e+02  1.58403585e+02 -5.13120778e-02]
 [ 3.02293588e+02  2.98379880e+02  1.95959825e-02]
 [ 3.20032705e+02  2.49842434e+02 -1.14653097e-03]
 [ 3.32281

Train the model

In [4]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflowjs as tfjs

train = "Letters"

if train == "Letters":
    # Load the landmark and label data
    landmarks = np.load("Letters/hand_landmarks.npy")
    labels = np.load("Letters/hand_labels.npy")
                        
elif train == "Numbers":
    # Load the landmark and label data
    landmarks = np.load("Numbers/hand_landmarks.npy")
    labels = np.load("Numbers/hand_labels.npy")                   
                        
# Normalize the landmark data
mean = np.mean(landmarks, axis=0)
std = np.std(landmarks, axis=0)
landmarks_norm = (landmarks - mean) / std

# Convert the labels to one-hot encoding
label_encoder = LabelEncoder()
labels_int = label_encoder.fit_transform(labels)
onehot_encoder = OneHotEncoder(sparse=False)
labels_one_hot = onehot_encoder.fit_transform(labels_int.reshape(-1, 1))

# Reshape the landmark data to match the input shape of VGG16
landmarks_resized = np.zeros((landmarks_norm.shape[0], 112, 112, 3))
for i in range(landmarks_norm.shape[0]):
    img = np.stack([landmarks_norm[i]] * 3, axis=-1)
    img = np.expand_dims(img, axis=0)  # add a new axis to img
    img_resized = tf.image.resize(img, (112, 112)).numpy()[0]  # resize and remove the added axis
    landmarks_resized[i] = img_resized

# Split the data into training and validation sets
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(landmarks_resized, labels_one_hot, test_size=0.2, random_state=42)

# Define and compile the VGG16 model
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=(112, 112, 3))
x = vgg_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(labels_one_hot.shape[1], activation='softmax')(x)
model = Model(inputs=vgg_model.input, outputs=predictions)

# Reduce the learning rate to make the model converge faster
optimizer = Adam(learning_rate=0.0001)

# Compile the model with categorical_crossentropy loss
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_val, y_val))

if train == "Letters":
    #Save the trained model
    model.save("Letters\hand_sign_model.h5")
elif train == "Numbers":
    #Save the trained model
    model.save("Numbers\hand_sign_model.h5")
else:
    #Save the trained model
    model.save("hand_sign_model.h5")
                        



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Convert model to a Tensorflow js model

In [8]:
import tensorflow.keras as keras
import tensorflowjs as tfjs

#Convert the model to TensorFlow.js format
Letter_model = keras.models.load_model("Letters/hand_sign_model.h5")
tfjs.converters.save_keras_model(Letter_model, "Letters/tfjs_model")
print("Letter model TFjs conversion Done")

Number_model = keras.models.load_model("Numbers/hand_sign_model.h5")
tfjs.converters.save_keras_model(Number_model, "Numbers/tfjs_model")
print("Number model TFjs conversion Done")

Letter model TFjs conversion Done
Number model TFjs conversion Done


Check the model with a test dataset

In [None]:
#Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test loss:", test_loss)
print("Test accuracy:", test_acc)

#Load the model and use it for inference
loaded_model = tf.keras.models.load_model("hand_sign_model.h5")
img = np.random.randn(1, 224, 224, 3)
prediction = loaded_model.predict(img)
print("Prediction:", prediction)