In [1]:
import os
import cv2
import random
import numpy as np
import tensorflow as tf
import mediapipe as mp
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

2024-11-06 22:36:30.950148: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-06 22:36:30.952776: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-06 22:36:31.011199: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-06 22:36:31.011928: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(60, 60, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

2024-11-06 22:36:36.010475: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [3]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [4]:
gesture_labels = {
    'call_me': 0,
    'fingers_crossed': 1,
    'okay': 2,
    'paper': 3,
    'peace': 4,
    'rock': 5,
    'rock_on': 6,
    'scissor': 7,
    'thumbs': 8,
    'up': 9
}

In [5]:
folder = 'HandGesture/images'

In [6]:
def load_data(folder):
    images = []
    labels = []
    
    for gesture_dir in os.listdir(folder):
        label = gesture_labels.get(gesture_dir)
        if label is None:
            continue  
        path = os.path.join(folder, gesture_dir)
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (60, 60))  
            images.append(img)
            labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

In [7]:
images, labels = load_data(folder)
x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

In [8]:
train_gen = ImageDataGenerator(rescale=1.0/255.0)
val_gen = ImageDataGenerator(rescale=1.0/255.0)

In [9]:
model.fit(train_gen.flow(x_train, y_train, batch_size=32), validation_data=val_gen.flow(x_val, y_val, batch_size=32), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7eb4497c2020>

In [10]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

I0000 00:00:1730912835.806962   50812 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1730912835.810161   51455 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.0.3-1pop1~1711635559~22.04~7a9f319), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)


In [11]:
def show_random():
    gesture_dir = random.choice(list(gesture_labels.keys()))
    gesture_path = os.path.join(folder, gesture_dir)
    random_image_path = os.path.join(gesture_path, random.choice(os.listdir(gesture_path)))
    img = cv2.imread(random_image_path)
    cv2.imshow('Gesture to Match', img)
    cv2.waitKey(3000)
    cv2.destroyWindow('Gesture to Match')
    gesture_label = gesture_labels[gesture_dir] 
    return gesture_label

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [12]:
def detect(img):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img_rgb)

    if result.multi_hand_landmarks:
        for hand in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(img, hand, mp_hands.HAND_CONNECTIONS)
    return img, result.multi_hand_landmarks

W0000 00:00:1730912835.860472   51449 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [13]:
def predict(roi):
    if roi.size == 0:
        return None  

    roi_resized = cv2.resize(roi, (60, 60))
    roi_resized = np.expand_dims(roi_resized, axis=0) / 255.0
    pred = model.predict(roi_resized)[0]

    ind = np.argsort(pred)[-5:][::-1]
    prob = pred[ind]

    labels = [(i, p) for i, p in zip(ind, prob)]
    print("Top Predictions:")
    for i, p in labels:
        print(f"Label {i}: {p:.2f}")

    return ind

In [14]:
def check(pred, true):
    if true in pred:
        print("Match found!")
        return True
    else:
        print("No match found. Try again.")
        return False

In [21]:
import cv2

cap = cv2.VideoCapture(0)
label = show_random()

while True:
    ret, frame = cap.read()

    processed_frame, hand = detect(frame)

    if hand:
        h, w, _ = processed_frame.shape
        x_min, y_min = w, h
        x_max, y_max = 0, 0

        for lm in hand[0].landmark:
            x, y = int(lm.x * w), int(lm.y * h)
            x_min, y_min = min(x, x_min), min(y, y_min)
            x_max, y_max = max(x, x_max), max(y, y_max)

        x_min, y_min = max(0, x_min - 20), max(0, y_min - 20)
        x_max, y_max = min(w, x_max + 20), min(h, y_max + 20)

        roi = processed_frame[y_min:y_max, x_min:x_max]
        pred = predict(roi)

        if check(pred, label):
            cv2.putText(processed_frame, "Match!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            print("Gesture matched!")
            cv2.imshow('Hand Detection and Matching', processed_frame)
            cv2.waitKey(2000)  
            break  
        else:
            cv2.putText(processed_frame, "Try again", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow('Hand Detection and Matching', processed_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break  

cap.release()
cv2.destroyAllWindows()

Top Predictions:
Label 5: 1.00
Label 0: 0.00
Label 4: 0.00
Label 6: 0.00
Label 3: 0.00
Match found!
Gesture matched!
