In [1]:
import cv2
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model




In [3]:
def preprocess_roi(roi):
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        # Find the largest contour
        largest_contour = max(contours, key=cv2.contourArea)
        
        # Create a mask with the largest contour
        mask = np.zeros_like(thresh)
        cv2.drawContours(mask, [largest_contour], -1, (255), thickness=cv2.FILLED)
        
        # Bitwise AND with the threshold image and the mask to get the hand only
        hand = cv2.bitwise_and(thresh, mask)
    else:
        hand = thresh
    
    resized = cv2.resize(hand, (100, 100))
    return resized

def capture_images(labels, num_samples, dataset_path='F:\\Dataset\\data'):
    cap = cv2.VideoCapture(1)
    
    for label in labels:
        os.makedirs(os.path.join(dataset_path, label), exist_ok=True)
        count = 0
        while count < num_samples:
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.flip(frame, 1)
            x_start, y_start, x_end, y_end = 300, 100, 600, 400
            roi = frame[y_start:y_end, x_start:x_end]
            
            preprocessed_roi = preprocess_roi(roi)
            
            cv2.rectangle(frame, (x_start, y_start), (x_end, y_end), (0, 255, 0), 2)
            cv2.putText(frame, f'Capturing {label} - {count}/{num_samples}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
            cv2.imshow('Frame', frame)
            cv2.imshow('ROI', preprocessed_roi)
            
            if count % 5 == 0:
                image_path = os.path.join(dataset_path, label, f'{label}_{count}.png')
                cv2.imwrite(image_path, preprocessed_roi)
                print(f'Captured {image_path}')
                count += 1
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
        
        if key == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# List of gesture labels
labels = ['blank', 'fist', 'five', 'ok', 'thumbsdown', 'thumbsup']
dataset_path = 'F:\\Dataset\\data'

# Capture images for all gestures
capture_images(labels, 100, dataset_path)

Captured F:\Dataset\data\blank\blank_0.png


In [4]:
# Image dimensions
img_height, img_width = 100, 100
num_classes = 6
dataset_path = 'F:\Dataset\data'

# Data generators for training and validation
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='training'
)
validation_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, validation_data=validation_generator, epochs=10)

# Save the model
model.save('hand_gesture_model.h5')

Found 8264 images belonging to 6 classes.
Found 2063 images belonging to 6 classes.



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


In [5]:
# Load the trained model
model = load_model('hand_gesture_model.h5')

# Define the gesture labels
gesture_labels = ['blank', 'fist', 'five', 'ok', 'thumbsdown', 'thumbsup']

def preprocess_roi(roi):
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        # Find the largest contour
        largest_contour = max(contours, key=cv2.contourArea)
        
        # Create a mask with the largest contour
        mask = np.zeros_like(thresh)
        cv2.drawContours(mask, [largest_contour], -1, (255), thickness=cv2.FILLED)
        
        # Bitwise AND with the threshold image and the mask to get the hand only
        hand = cv2.bitwise_and(thresh, mask)
    else:
        hand = thresh
    
    resized = cv2.resize(hand, (100, 100))
    normalized = resized / 255.0
    return np.reshape(normalized, (1, 100, 100, 1))

def predict_gesture(frame, model):
    # Define region of interest (ROI)
    x_start, y_start, x_end, y_end = 300, 100, 600, 400
    roi = frame[y_start:y_end, x_start:x_end]
    processed_roi = preprocess_roi(roi)
    
    # Predict gesture
    result = model.predict(processed_roi)
    class_index = np.argmax(result)
    return gesture_labels[class_index]

cap = cv2.VideoCapture(1)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    gesture = predict_gesture(frame, model)
    
    # Display the gesture
    cv2.putText(frame, gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
    x_start, y_start, x_end, y_end = 300, 100, 600, 400
    cv2.rectangle(frame, (x_start, y_start), (x_end, y_end), (0, 255, 0), 2)
    cv2.imshow('Hand Gesture Recognition', frame)
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

