In [5]:
import cv2
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def extract_frames_from_videos():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for video_file in os.listdir(gesture_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(gesture_path, video_file)
                # Create a directory for storing frames of the current video file
                frames_directory = os.path.join(gesture_path, os.path.splitext(video_file)[0] + "_frames")
                os.makedirs(frames_directory, exist_ok=True)
                
                cap = cv2.VideoCapture(video_path)
                frame_count = 0
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame_path = os.path.join(frames_directory, f"frame{frame_count}.jpg")
                    cv2.imwrite(frame_path, frame)
                    frame_count += 1
                cap.release()

extract_frames_from_videos()


In [4]:
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def preprocess_frame(image_path):
    image = load_img(image_path, color_mode='grayscale', target_size=(64, 64))
    image = img_to_array(image)
    image /= 255.0  # Normalize to [0,1]
    return image

# Create dataset
X = []
y = []
label_map = {gesture: idx for idx, gesture in enumerate(gesture_dirs)}

for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture)
    for sub_dir in os.listdir(gesture_path):
        sub_dir_path = os.path.join(gesture_path, sub_dir)
        if os.path.isdir(sub_dir_path):
            for frame_file in os.listdir(sub_dir_path):
                if frame_file.endswith(".jpg"):
                    frame_path = os.path.join(sub_dir_path, frame_file)
                    X.append(preprocess_frame(frame_path))
                    y.append(label_map[gesture])

X = np.array(X)
y = np.array(y)

print("Dataset shape:", X.shape)
print("Labels shape:", y.shape)


Dataset shape: (21471, 64, 64, 1)
Labels shape: (21471,)


In [10]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
y = to_categorical(y, num_classes=len(gesture_dirs))

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(gesture_dirs), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X, y, epochs=10, validation_split=0.2)

# Save the trained model
model.save('gesture_recognition_model.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 61ms/step - accuracy: 0.5556 - loss: 1.0612 - val_accuracy: 0.0000e+00 - val_loss: 16.1320
Epoch 2/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 72ms/step - accuracy: 0.8133 - loss: 0.4826 - val_accuracy: 0.0000e+00 - val_loss: 17.0388
Epoch 3/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 49ms/step - accuracy: 0.8702 - loss: 0.3453 - val_accuracy: 0.0000e+00 - val_loss: 18.3809
Epoch 4/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 49ms/step - accuracy: 0.8996 - loss: 0.2629 - val_accuracy: 0.0000e+00 - val_loss: 20.8998
Epoch 5/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 49ms/step - accuracy: 0.9244 - loss: 0.1977 - val_accuracy: 2.3283e-04 - val_loss: 24.2679
Epoch 6/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 49ms/step - accuracy: 0.9393 - loss: 0.1623 - val_accuracy: 0.0088 - val_loss



In [1]:
import cv2
import mediapipe as mp
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"
subfolder_name = "cropped_hands"

def create_subfolder(gesture_path):
    subfolder_path = os.path.join(gesture_path, subfolder_name)
    if not os.path.exists(subfolder_path):
        os.makedirs(subfolder_path)
    return subfolder_path

def extract_hands_from_videos():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        subfolder_path = create_subfolder(gesture_path)
        for video_file in os.listdir(gesture_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(gesture_path, video_file)
                cap = cv2.VideoCapture(video_path)
                frame_count = 0
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break

                    # Convert the BGR image to RGB
                    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    result = hands.process(rgb_frame)

                    # Draw hand annotations on the image.
                    if result.multi_hand_landmarks:
                        for hand_landmarks in result.multi_hand_landmarks:
                            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                            # Extract the bounding box of the hand
                            h, w, _ = frame.shape
                            x_min = w
                            y_min = h
                            x_max = y_max = 0

                            for landmark in hand_landmarks.landmark:
                                x, y = int(landmark.x * w), int(landmark.y * h)
                                if x < x_min:
                                    x_min = x
                                if y < y_min:
                                    y_min = y
                                if x > x_max:
                                    x_max = x
                                if y > y_max:
                                    y_max = y

                            # Add some padding to the bounding box
                            padding = 10
                            x_min = max(x_min - padding, 0)
                            y_min = max(y_min - padding, 0)
                            x_max = min(x_max + padding, w)
                            y_max = min(y_max + padding, h)

                            # Crop the hand region
                            cropped_hand = frame[y_min:y_max, x_min:x_max]

                            # Save the cropped hand image in the subfolder
                            frame_path = os.path.join(subfolder_path, f"{video_file}_hand_frame{frame_count}.jpg")
                            cv2.imwrite(frame_path, cropped_hand)

                    frame_count += 1
                cap.release()

extract_hands_from_videos()




In [13]:
import numpy as np
import cv2
import pyautogui
import time
import tensorflow as tf
model = tf.keras.models.load_model('gesture_recognition_model.h5')

def preprocess_live_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (64, 64))
    frame = frame.astype('float32') / 255.0
    frame = np.expand_dims(frame, axis=-1)  # Add channel dimension
    return frame

def predict_gesture(frame):
    processed_frame = preprocess_live_frame(frame)
    prediction = model.predict(np.expand_dims(processed_frame, axis=0))
    return np.argmax(prediction)

cap = cv2.VideoCapture(0)
accelerating = False

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gesture = predict_gesture(frame)

    if gesture == label_map["one"]:
        pyautogui.press('right')
        print('right')
    elif gesture == label_map["thumbs up"]:
        pyautogui.press('left')
        print('left')
    elif gesture == label_map["revolt"]:
        pyautogui.press('down')
        print('down')
    elif gesture == label_map["open hand"]:
        pyautogui.press('space')
        print('space')
    elif gesture == label_map["okay"] and accelerating:
        pyautogui.keyUp('up')
        print('up')
        accelerating = False
    elif gesture == label_map["cheese"] and not accelerating:
        pyautogui.keyDown('up')
        print('stop')
        accelerating = True

    cv2.imshow('Frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
right
[1m1/1[0

In [17]:
import os
import numpy as np
import cv2
import mediapipe as mp
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Ensure GPU is used
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"
subfolder_name = "cropped_hands"

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (64, 64))
    image = img_to_array(image)
    image /= 255.0  # Normalize to [0, 1]
    return image

# Initialize lists for storing data and labels
X = []
y = []
label_map = {gesture: idx for idx, gesture in enumerate(gesture_dirs)}

for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture, subfolder_name)
    if os.path.exists(gesture_path):
        for image_file in os.listdir(gesture_path):
            if image_file.endswith(".jpg"):
                image_path = os.path.join(gesture_path, image_file)
                image = preprocess_image(image_path)
                X.append(image)
                y.append(label_map[gesture])

X = np.array(X)
y = np.array(y)

# One-hot encode labels
y = to_categorical(y, num_classes=len(gesture_dirs))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TensorBoard callback for monitoring the training
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

# Build the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(len(gesture_dirs), activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test), callbacks=[tb_callback])

# Save the trained model
model.save('gesture_recognition_cnn_model.h5')


Epoch 1/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 42ms/step - categorical_accuracy: 0.2759 - loss: 1.7770 - val_categorical_accuracy: 0.6394 - val_loss: 1.0458
Epoch 2/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 41ms/step - categorical_accuracy: 0.7042 - loss: 0.8581 - val_categorical_accuracy: 0.8030 - val_loss: 0.5661
Epoch 3/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 42ms/step - categorical_accuracy: 0.8531 - loss: 0.4314 - val_categorical_accuracy: 0.8284 - val_loss: 0.5411
Epoch 4/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 40ms/step - categorical_accuracy: 0.8946 - loss: 0.3026 - val_categorical_accuracy: 0.8639 - val_loss: 0.4497
Epoch 5/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - categorical_accuracy: 0.9369 - loss: 0.1850 - val_categorical_accuracy: 0.8588 - val_loss: 0.5539
Epoch 6/15
[1m173/173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m



In [7]:
import os
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.preprocessing.image import img_to_array

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"
subfolder_name = "cropped_hands"

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)

def extract_features(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = hands.process(image)
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.append([landmark.x, landmark.y, landmark.z])
            return np.array(landmarks).flatten()
    return np.zeros((63,))  # 21 landmarks * 3 coordinates (x, y, z)

X, y = [], []
label_map = {gesture: idx for idx, gesture in enumerate(gesture_dirs)}

for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture, subfolder_name)
    if os.path.exists(gesture_path):
        for image_file in os.listdir(gesture_path):
            if image_file.endswith(".jpg"):
                image_path = os.path.join(gesture_path, image_file)
                features = extract_features(image_path)
                X.append(features)
                y.append(label_map[gesture])

X = np.array(X)
y = np.array(y)


In [8]:
def create_sequences(data, labels, sequence_length=30):
    sequences = []
    sequence_labels = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length])
        sequence_labels.append(labels[i + sequence_length])
    return np.array(sequences), np.array(sequence_labels)

sequence_length = 30
X_sequences, y_sequences = create_sequences(X, y, sequence_length)

# One-hot encode labels
from keras.utils import to_categorical
y_sequences = to_categorical(y_sequences, num_classes=len(gesture_dirs))


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

# TensorBoard callback for monitoring the training
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(sequence_length, X_sequences.shape[2])))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(gesture_dirs), activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), callbacks=[tb_callback])

# Save the trained model
model.save('gesture_recognition_lstm_model.h5')


Epoch 1/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 64ms/step - categorical_accuracy: 0.1881 - loss: 1.9201 - val_categorical_accuracy: 0.2385 - val_loss: 1.8428
Epoch 2/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 59ms/step - categorical_accuracy: 0.2387 - loss: 1.8651 - val_categorical_accuracy: 0.2458 - val_loss: 1.8469
Epoch 3/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 63ms/step - categorical_accuracy: 0.2353 - loss: 1.8514 - val_categorical_accuracy: 0.2385 - val_loss: 1.8712
Epoch 4/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 113ms/step - categorical_accuracy: 0.2330 - loss: 1.8583 - val_categorical_accuracy: 0.2458 - val_loss: 1.8523
Epoch 5/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 119ms/step - categorical_accuracy: 0.2232 - loss: 1.9139 - val_categorical_accuracy: 0.2458 - val_loss: 1.8471
Epoch 6/200
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━

KeyboardInterrupt: 

In [7]:
import cv2
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def augment_data(image_path, save_dir, prefix, num_augmented):
    image = cv2.imread(image_path)
    image = image.reshape((1,) + image.shape)  # Reshape to (1, height, width, channels)
    i = 0
    for batch in datagen.flow(image, batch_size=1, save_to_dir=save_dir, save_prefix=prefix, save_format='jpg'):
        i += 1
        if i >= num_augmented:
            break

# Define parent directory
parent_directory = "dataset"

# List of gestures
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
skewed_classes = ["cheese", "okay"]  # Example skewed classes
num_augmented = 10  # Number of augmented images per original image

for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture, "cropped_hands")
    save_dir = os.path.join(parent_directory, gesture, "cropped_hands_augmented")
    os.makedirs(save_dir, exist_ok=True)
    
    for frame_file in os.listdir(gesture_path):
        if frame_file.endswith(".jpg"):
            frame_path = os.path.join(gesture_path, frame_file)
            augment_data(frame_path, save_dir, gesture, num_augmented)


In [6]:
from tensorflow.keras.utils import to_categorical
X, y = [], []
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
# Original data
for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture)
    for frame_file in os.listdir(gesture_path):
        if frame_file.endswith(".jpg"):
            frame_path = os.path.join(gesture_path, frame_file)
            X.append(preprocess_frame(frame_path))
            y.append(label_map[gesture])

# Augmented data
for gesture in skewed_classes:
    gesture_path = os.path.join(parent_directory, f"{gesture}_augmented")
    for frame_file in os.listdir(gesture_path):
        if frame_file.endswith(".jpg"):
            frame_path = os.path.join(gesture_path, frame_file)
            X.append(preprocess_frame(frame_path))
            y.append(label_map[gesture])

X = np.array(X)
y = np.array(y)
y = to_categorical(y, num_classes=len(gesture_dirs))


In [2]:
import cv2
import numpy as np
import pyautogui
import mediapipe as mp
import tensorflow as tf
import time

# Load the trained model
model = tf.keras.models.load_model('gesture_recognition_cnn_model.h5')

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Gesture label mapping
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
label_map = {gesture: idx for idx, gesture in enumerate(gesture_dirs)}

def preprocess_live_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (64, 64)) 
    frame = frame.astype('float32') / 255.0
    frame = np.expand_dims(frame, axis=-1)  # Add channel dimension
    return frame

def predict_gesture(frame):
    processed_frame = preprocess_live_frame(frame)
    prediction = model.predict(np.expand_dims(processed_frame, axis=0))
    return np.argmax(prediction)

cap = cv2.VideoCapture(0)
accelerating = False

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    # Draw hand annotations on the image and extract the hand region
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract the bounding box of the hand
            h, w, _ = frame.shape
            x_min = w
            y_min = h
            x_max = y_max = 0

            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * w), int(landmark.y * h)
                if x < x_min:
                    x_min = x
                if y < y_min:
                    y_min = y
                if x > x_max:
                    x_max = x
                if y > y_max:
                    y_max = y

            # Add some padding to the bounding box
            padding = 10
            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, w)
            y_max = min(y_max + padding, h)

            # Crop the hand region
            cropped_hand = frame[y_min:y_max, x_min:x_max]

            gesture = predict_gesture(cropped_hand)

            if gesture == label_map["one"]:
                pyautogui.keyDown('right')
                print('right')
                time.sleep(2)  # Hold the right key for 2 seconds
                pyautogui.keyUp('right')

            elif gesture == label_map["thumbs up"]:
                pyautogui.keyDown('left')
                print('left')
                time.sleep(2)  # Hold the left key for 2 seconds
                pyautogui.keyUp('left')

            elif gesture == label_map["revolt"]:
                pyautogui.press('down')
                print('down')

            elif gesture == label_map["open hand"]:
                pyautogui.press('space')
                print('space')

            elif gesture == label_map["okay"] and accelerating:
                pyautogui.keyUp('up')
                print('up')
                accelerating = False

            elif gesture == label_map["cheese"] and not accelerating:
                pyautogui.keyDown('up')
                print('stop')
                accelerating = True

            cv2.imshow('Frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
space
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
down
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
left
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
space
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━

: 

In [3]:
"""
Storing the frames in the same folder as the video directory.
import cv2
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def extract_frames_from_videos():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for video_file in os.listdir(gesture_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(gesture_path, video_file)
                cap = cv2.VideoCapture(video_path)
                frame_count = 0
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame_path = os.path.join(gesture_path, f"{video_file}_frame{frame_count}.jpg")
                    cv2.imwrite(frame_path, frame)
                    frame_count += 1
                cap.release()

extract_frames_from_videos()"""


In [4]:
"""import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def delete_extracted_frames():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for file in os.listdir(gesture_path):
            if "_frame" in file and file.endswith(".jpg"):
                file_path = os.path.join(gesture_path, file)
                os.remove(file_path)
                print(f"Deleted {file_path}")

delete_extracted_frames()"""


Deleted dataset\cheese\1.avi_frame0.jpg
Deleted dataset\cheese\1.avi_frame1.jpg
Deleted dataset\cheese\1.avi_frame10.jpg
Deleted dataset\cheese\1.avi_frame11.jpg
Deleted dataset\cheese\1.avi_frame12.jpg
Deleted dataset\cheese\1.avi_frame13.jpg
Deleted dataset\cheese\1.avi_frame14.jpg
Deleted dataset\cheese\1.avi_frame15.jpg
Deleted dataset\cheese\1.avi_frame16.jpg
Deleted dataset\cheese\1.avi_frame17.jpg
Deleted dataset\cheese\1.avi_frame18.jpg
Deleted dataset\cheese\1.avi_frame19.jpg
Deleted dataset\cheese\1.avi_frame2.jpg
Deleted dataset\cheese\1.avi_frame20.jpg
Deleted dataset\cheese\1.avi_frame21.jpg
Deleted dataset\cheese\1.avi_frame22.jpg
Deleted dataset\cheese\1.avi_frame23.jpg
Deleted dataset\cheese\1.avi_frame24.jpg
Deleted dataset\cheese\1.avi_frame25.jpg
Deleted dataset\cheese\1.avi_frame26.jpg
Deleted dataset\cheese\1.avi_frame27.jpg
Deleted dataset\cheese\1.avi_frame28.jpg
Deleted dataset\cheese\1.avi_frame29.jpg
Deleted dataset\cheese\1.avi_frame3.jpg
Deleted dataset\chee