In [1]:
import cv2
import numpy as np
import os
import random
import glob
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from keras.optimizers import Adam

# Set data directory path
DATASET_PATH = "C:/Users/nandu/Downloads/archive/data"

# Load and preprocess the dataset
def load_dataset():
    loaded_images = []
    list_of_gestures = ['blank', 'ok', 'thumbsup', 'thumbsdown', 'fist', 'five']
    
    for gesture in list_of_gestures:
        gesture_path = os.path.join(f'C:/Users/nandu/Downloads/archive/data/{gesture}', '*')
        images = glob.glob(gesture_path)
        for img_path in images[:1600]:  # Use a limit of 1600 images per gesture
            image = cv2.imread(img_path)
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            gray_image = cv2.resize(gray_image, (100, 120))
            loaded_images.append(gray_image)

    outputVectors = []
    for i in range(6):  # For each of the 6 gestures
        outputVectors.extend([[1 if j == i else 0 for j in range(6)]] * 1600)
    
    X = np.asarray(loaded_images)
    y = np.asarray(outputVectors)
    
    X = X.reshape(X.shape[0], 100, 120, 1)
    return X, y

# CNN model creation
def create_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 120, 1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(6, activation='softmax'))  # 6 gestures
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Train model
def train_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train, batch_size=128, epochs=50, verbose=1, validation_data=(X_test, y_test))
    model.save('C:/Users/nandu/Downloads/archive/data/hand_gesture_recognition.h5')


if __name__ == "__main__":
    # Load and preprocess the dataset
    X, y = load_dataset()
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
    
    # Create and train the model
    model = create_model()
    train_model(model, X_train, y_train, X_test, y_test)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 720ms/step - accuracy: 0.6680 - loss: 5.6892 - val_accuracy: 0.8682 - val_loss: 0.3705
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 676ms/step - accuracy: 0.8945 - loss: 0.2854 - val_accuracy: 0.9807 - val_loss: 0.0536
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 678ms/step - accuracy: 0.9284 - loss: 0.1849 - val_accuracy: 0.9854 - val_loss: 0.0354
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 671ms/step - accuracy: 0.9519 - loss: 0.1141 - val_accuracy: 0.9953 - val_loss: 0.0175
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 667ms/step - accuracy: 0.9555 - loss: 0.1030 - val_accuracy: 0.9984 - val_loss: 0.0091
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 688ms/step - accuracy: 0.9613 - loss: 0.0864 - val_accuracy: 0.9953 - val_loss: 0.0131
Epoch 7/50
[1m60/60[



In [4]:
import cv2
import numpy as np
from keras.models import load_model

# Initialize the background for background subtraction
bg = None

# Function to segment the hand region
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)
    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]
    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # return None, if no contours detected
    if len(cnts) == 0:
        return None
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

# Function to capture background for background subtraction
def run_avg(image, accumWeight):
    global bg
    if bg is None:
        bg = image.copy().astype("float")
        return
    cv2.accumulateWeighted(image, bg, accumWeight)

# Load Model Weights
def _load_weights():
    try:
        model = load_model('C:/Users/nandu/Downloads/archive/data/hand_gesture_recognition.h5')
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

# Predict the class of the gesture from the live feed
def getPredictedClass(model):
    image = cv2.imread('Temp.png')
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.resize(gray_image, (100, 120))
    gray_image = gray_image.reshape(1, 100, 120, 1)
    prediction = model.predict_on_batch(gray_image)
    predicted_class = np.argmax(prediction)
    
    gestures = ["Blank", "OK", "Thumbs Up", "Thumbs Down", "Fist", "Five"]
    return gestures[predicted_class]

# Run live video feed to detect hand gesture
def run_live_feed():
    accumWeight = 0.5
    camera = cv2.VideoCapture(0)  # Webcam capture
    top, right, bottom, left = 10, 350, 225, 590
    num_frames = 0
    model = _load_weights()

    while True:
        grabbed, frame = camera.read()
        frame = cv2.resize(frame, (700, 700))
        frame = cv2.flip(frame, 1)

        clone = frame.copy()
        roi = frame[top:bottom, right:left]
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        if num_frames < 30:
            run_avg(gray, accumWeight)
        else:
            hand = segment(gray)
            if hand is not None:
                (thresholded, segmented) = hand
                cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))
                cv2.imwrite('Temp.png', thresholded)
                predictedClass = getPredictedClass(model)
                cv2.putText(clone, predictedClass, (70, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                cv2.imshow("Thresholded", thresholded)
        
        cv2.rectangle(clone, (left, top), (right, bottom), (0, 255, 0), 2)
        num_frames += 1
        cv2.imshow("Video Feed", clone)

        keypress = cv2.waitKey(1) & 0xFF
        if keypress == ord("q"):
            break

    camera.release()
    cv2.destroyAllWindows()

# ----------------- Main Execution -----------------

if __name__ == "__main__":
    # Run live video feed for hand gesture recognition
    run_live_feed()


