In [None]:
%pip install mediapipe
%pip install opencv-python
%pip install numpy
%pip install keras
%pip install tensorflow
%pip install scikit-learn

Imports

In [1]:
import cv2
import mediapipe as mp
import os
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
import tensorflow as tf


Extracting hand landmarks

In [2]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)
def extract_hand_landmarks(image_path):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        landmarks = np.array([[lm.x, lm.y, lm.z] for lm in results.multi_hand_landmarks[0].landmark]).flatten()
        print(f"Hand landmarks detected in image: {image_path}")
        return landmarks
    else:
        print(f"No hand landmarks detected in image: {image_path}")
        return None

Loading data

In [5]:
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    objects = []
    label_map = {'Help':0,'Water':1,'Game':2,'Movie':3,'What':4,'Me':5}
    for obj in root.findall('object'):
        label = obj.find('name').text
        objects.append((label_map[label]))
    return objects

def load_data(image_dir, annotation_dir):
    landmarks = []
    labels = []
    for xml_file in os.listdir(annotation_dir):
        image_file = os.path.join(image_dir, os.path.splitext(xml_file)[0] + '.jpg')
        if os.path.exists(image_file):
            landmark = extract_hand_landmarks(image_file)
            if landmark is not None:
                landmarks.append(landmark)
                labels.append(parse_xml(os.path.join(annotation_dir, xml_file)))
    return np.array(landmarks), np.array(labels)

# Paths to your training and testing datasets
data_dir_train_image = r'images/trainimages'
data_dir_train_label = r'images/trainlabel'
data_dir_test_image = r'images/testimages'
data_dir_test_label = r'images/testlabel'

In [6]:
# Load the images and labels
train_landmarks, train_labels = load_data(data_dir_train_image,data_dir_train_label)
test_landmarks, test_labels = load_data(data_dir_test_image,data_dir_test_label)

Hand landmarks detected in image: images/trainimages\photo_10_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimages\photo_10_2024-05-08_22-13-46.jpg
Hand landmarks detected in image: images/trainimages\photo_11_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimages\photo_11_2024-05-08_22-13-46.jpg
Hand landmarks detected in image: images/trainimages\photo_12_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimages\photo_12_2024-05-08_22-13-46.jpg
Hand landmarks detected in image: images/trainimages\photo_13_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimages\photo_13_2024-05-08_22-13-46.jpg
Hand landmarks detected in image: images/trainimages\photo_14_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimages\photo_14_2024-05-08_22-13-46.jpg
No hand landmarks detected in image: images/trainimages\photo_15_2024-05-08_22-13-05.jpg
Hand landmarks detected in image: images/trainimage

In [7]:
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

Creating the neural network

In [11]:
network = models.Sequential()
network.add(layers.Flatten(input_shape=(63,1)))
network.add(layers.Dense(128, activation='relu'))
network.add(layers.Dense(64, activation='relu'))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dense(6, activation='softmax'))

network.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

The training

In [29]:
early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
network.fit(train_landmarks, train_labels, epochs=50, batch_size=32,callbacks=[early_stopping], validation_data=(test_landmarks, test_labels))
network.evaluate(test_landmarks, test_labels, verbose=2) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
2/2 - 0s - loss: 0.3458 - accuracy: 0.8980 - 21ms/epoch - 10ms/step


[0.34576860070228577, 0.8979591727256775]

Saving the model and the weights

In [30]:
model_json = network.to_json()
with open('model_architecture.json', 'w') as f:
  f.write(model_json)
network.save_weights('model_weights.h5')