In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping


In [2]:
# Set dataset directory
data_dir = r"C:\Users\mnlku\my personal\btech\internships\Machine Learning Prodigy InfoTech Internship\Hand Gesture Recognition using Leap Motion Data\leapGestRecog"

# Updated gesture map
gesture_map = {
    '01_palm': 0,
    '02_l': 1,
    '03_fist': 2,
    '04_fist_moved': 3,
    '05_thumb': 4,
    '06_index': 5,
    '07_ok': 6,
    '08_palm_moved': 7,
    '09_c': 8,
    '10_down': 9
}

image_size = 64
images = []
labels = []

# Read dataset
for user_folder in os.listdir(data_dir):
    user_path = os.path.join(data_dir, user_folder)
    if os.path.isdir(user_path):
        for gesture_folder in os.listdir(user_path):
            gesture_path = os.path.join(user_path, gesture_folder)
            label = gesture_map.get(gesture_folder)
            if label is not None:
                for file in os.listdir(gesture_path):
                    img_path = os.path.join(gesture_path, file)
                    try:
                        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (image_size, image_size))
                        images.append(img)
                        labels.append(label)
                    except Exception as e:
                        continue

# Prepare data
X = np.array(images).reshape(-1, image_size, image_size, 1) / 255.0
y = to_categorical(np.array(labels), num_classes=10)

print("Data shape:", X.shape, y.shape)


Data shape: (20000, 64, 64, 1) (20000, 10)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from tensorflow.keras import Input

model = Sequential([
    Input(shape=(image_size, image_size, 1)),  # ✅ Explicit input layer
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10, activation='softmax')
])


In [5]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [6]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=10,
                    batch_size=32,
                    callbacks=[early_stop])


Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 27ms/step - accuracy: 0.7466 - loss: 0.7852 - val_accuracy: 0.9967 - val_loss: 0.0146
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 28ms/step - accuracy: 0.9937 - loss: 0.0255 - val_accuracy: 1.0000 - val_loss: 0.0023
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 27ms/step - accuracy: 0.9959 - loss: 0.0162 - val_accuracy: 1.0000 - val_loss: 7.4491e-04
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9976 - loss: 0.0101 - val_accuracy: 1.0000 - val_loss: 3.7712e-04
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9978 - loss: 0.0064 - val_accuracy: 0.9992 - val_loss: 0.0019
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9974 - loss: 0.0078 - val_accuracy: 1.0000 - val_loss: 1.5700e-04
Epoc

In [7]:
model.save("hand_gesture_model.keras")


In [11]:
import tensorflow as tf

model = tf.keras.models.load_model("hand_gesture_model.keras", compile=False)

inv_map = {v: k for k, v in gesture_map.items()}

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Define ROI
    x1, y1, x2, y2 = 100, 100, 300, 300
    roi = frame[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (image_size, image_size))
    reshaped = resized.reshape(1, image_size, image_size, 1) / 255.0

    
    pred = model.predict(reshaped)
    class_id = np.argmax(pred)
    gesture_name = inv_map[class_id]

    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(frame, f'{gesture_name}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    cv2.imshow("Hand Gesture Recognition", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4