In [2]:
import os
import cv2
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from cvzone.HandTrackingModule import HandDetector
from cvzone.ClassificationModule import Classifier
import math
import time
import pyautogui
import mediapipe as mp

In [3]:
import os
import cv2
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

def load_images_from_folder(folder):
    images = []
    labels = []
    gesture_classes = {gesture_class: idx for idx, gesture_class in enumerate(os.listdir(folder))}
    for gesture_class, idx in gesture_classes.items():
        class_folder = os.path.join(folder, gesture_class)
        if os.path.isdir(class_folder):
            for filename in os.listdir(class_folder):
                img_path = os.path.join(class_folder, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (32, 32))
                    images.append(img)
                    labels.append(idx)
    return np.array(images), np.array(labels), gesture_classes

try:
    # Set the path to your project folder using a raw string
    folder = "data"

    # Load images from folders
    images, labels, gesture_classes = load_images_from_folder(folder)
    print(f"Loaded {len(images)} images.")

    # Normalize pixel values
    images = images / 255.0

    # Convert labels to one-hot encoding
    labels = to_categorical(labels, num_classes=len(gesture_classes))

    # Split dataset into training and validation sets
    train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
    print(f"Split into {len(train_images)} training and {len(val_images)} validation samples.")

    # Define model architecture
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(len(gesture_classes), activation='softmax')
    ])

    # Compile model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print("Model compiled successfully.")

    # Train model
    model.fit(train_images, train_labels, epochs=10, validation_data=(val_images, val_labels))
    print("Model trained successfully.")

    # Save model and labels
    os.makedirs('Model', exist_ok=True)
    model.save('Model/keras_model.h5')
    with open('Model/labels.txt', 'w') as f:
        for gesture_class, idx in gesture_classes.items():
            f.write(f"{idx} {gesture_class}\n")
    print("Model and labels saved successfully.")

    # Evaluate model
    test_loss, test_acc = model.evaluate(val_images, val_labels)
    print('Test accuracy:', test_acc)

except Exception as e:
    print(f"An error occurred: {e}")


Loaded 5741 images.
Split into 4592 training and 1149 validation samples.


  super().__init__(


Model compiled successfully.
Epoch 1/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.7469 - loss: 0.7704 - val_accuracy: 1.0000 - val_loss: 0.0023
Epoch 2/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0011 - val_accuracy: 1.0000 - val_loss: 0.0014
Epoch 3/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 1.0000 - loss: 4.9720e-04 - val_accuracy: 1.0000 - val_loss: 3.9773e-04
Epoch 4/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 1.0000 - loss: 3.1746e-04 - val_accuracy: 1.0000 - val_loss: 2.8295e-04
Epoch 5/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 1.0000 - loss: 1.3729e-04 - val_accuracy: 1.0000 - val_loss: 1.9602e-04
Epoch 6/10
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 1.0000 - loss: 1.0976e-04 - val_accu



Model trained successfully.
Model and labels saved successfully.
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 5.3623e-05
Test accuracy: 1.0


In [None]:
cap = cv2.VideoCapture(0)

# Check if the webcam is opened correctly
if not cap.isOpened():
    print("Error: Could not open video capture device.")
    exit()

# Initialize hand detector
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_draw = mp.solutions.drawing_utils

try:
    while True:
        success, img = cap.read()
        if not success:
            print("Failed to capture image")
            continue
        
        imgOutput = img.copy()
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        result = hands.process(imgRGB)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                mp_draw.draw_landmarks(imgOutput, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        cv2.imshow("Hand Tracking", imgOutput)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

finally:
    cap.release()
    cv2.destroyAllWindows()
detector = HandDetector(maxHands=1)
classifier = Classifier('Model/keras_model.h5', 'Model/labels.txt')

offset = 20
imgSize = 300

counter = 0

labels = ["free_hand","right","left", "v_up","v_down","max","min","stop"]

control_delay = 5  # Delay in seconds before performing another control action
last_action_time = time.time() - control_delay

while True:
    success, img = cap.read()
    imgOutput = img.copy()
    hands, img = detector.findHands(img)
    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']

        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
        imgCrop = img[y - offset: y + h + offset, x - offset: x + w + offset]

        if imgCrop.size == 0:
            print("Empty image crop. Skipping frame.")
            continue

        aspectRatio = h / w

        if aspectRatio > 1:
            k = imgSize / h
            wCal = math.ceil(k * w)
            imgResize = cv2.resize(imgCrop, (wCal, imgSize))
            imgResizeShape = imgResize.shape
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wCal + wGap] = imgResize
            prediction, index = classifier.getPrediction(imgWhite, draw=True)
            print(prediction, ":", index)

        else:
            k = imgSize / w
            hCal = math.ceil(k * h)
            imgResize = cv2.resize(imgCrop, (imgSize, hCal))
            imgResizeShape = imgResize.shape
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hCal + hGap, :] = imgResize
            prediction, index = classifier.getPrediction(imgWhite, draw=True)
            print(prediction, ":", index)

        cv2.rectangle(imgOutput, (x - offset, y - offset - 50), (x - offset + 90, y - offset - 50 + 50), (255, 0, 255),
                      cv2.FILLED)
        cv2.putText(imgOutput, labels[index], (x, y - 20), cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2)
        cv2.rectangle(imgOutput, (x - offset, y - offset), (x + w + offset, y + h + offset), (255, 0, 255), 4)

        cv2.imshow("ImageCrop", imgCrop)
        cv2.imshow("ImageWhite", imgWhite)

        # Perform YouTube controls based on the predicted label
        current_time = time.time()
        
        if labels[index] == "v_up" :
            pyautogui.press("up")  # Press up arrow key for volume up
            last_action_time = current_time
        elif labels[index] == "v_down":
            pyautogui.press("down")  # Press down arrow key for volume down
            last_action_time = current_time
        elif labels[index] == "free_hand":
            pass  # Do nothing for the "free_hand" gesture
        elif labels[index] == "stop" and current_time - last_action_time >= control_delay:
            pyautogui.press("space")  # Press spacebar to pause/play
            last_action_time = current_time
        elif labels[index] == "max" and current_time - last_action_time >= control_delay:
            pyautogui.press("f")  # Press f to enter full screen
            last_action_time = current_time
        elif labels[index] == "min" and current_time - last_action_time >= control_delay:
            pyautogui.press("esc")  # Press f to enter full screen
            last_action_time = current_time
        elif labels[index] == "right" and current_time - last_action_time >= control_delay:
            pyautogui.press("right")  # Press Right Arrow to skip 5 sec
            last_action_time = current_time
        elif labels[index] == "left" and current_time - last_action_time >= control_delay:
            pyautogui.press("left")  # Press Left Arrow to revind 5 sec
            last_action_time = current_time    

    cv2.imshow("Image", imgOutput)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break