In [2]:
!pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.1/38.8 MB 2.2 MB/s eta 0:00:18
   ---------------------------------------- 0.2/38.8 MB 1.6 MB/s eta 0:00:24
    --------------------------------------- 0.5/38.8 MB 3.9 MB/s eta 0:00:10
    --------------------------------------- 0.9/38.8 MB 5.0 MB/s eta 0:00:08
   - ----------------------

In [5]:
import cv2
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def extract_frames_from_videos():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for video_file in os.listdir(gesture_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(gesture_path, video_file)
                # Create a directory for storing frames of the current video file
                frames_directory = os.path.join(gesture_path, os.path.splitext(video_file)[0] + "_frames")
                os.makedirs(frames_directory, exist_ok=True)
                
                cap = cv2.VideoCapture(video_path)
                frame_count = 0
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame_path = os.path.join(frames_directory, f"frame{frame_count}.jpg")
                    cv2.imwrite(frame_path, frame)
                    frame_count += 1
                cap.release()

extract_frames_from_videos()


In [4]:
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def preprocess_frame(image_path):
    image = load_img(image_path, color_mode='grayscale', target_size=(64, 64))
    image = img_to_array(image)
    image /= 255.0  # Normalize to [0,1]
    return image

# Create dataset
X = []
y = []
label_map = {gesture: idx for idx, gesture in enumerate(gesture_dirs)}

for gesture in gesture_dirs:
    gesture_path = os.path.join(parent_directory, gesture)
    for sub_dir in os.listdir(gesture_path):
        sub_dir_path = os.path.join(gesture_path, sub_dir)
        if os.path.isdir(sub_dir_path):
            for frame_file in os.listdir(sub_dir_path):
                if frame_file.endswith(".jpg"):
                    frame_path = os.path.join(sub_dir_path, frame_file)
                    X.append(preprocess_frame(frame_path))
                    y.append(label_map[gesture])

X = np.array(X)
y = np.array(y)

print("Dataset shape:", X.shape)
print("Labels shape:", y.shape)


Dataset shape: (21471, 64, 64, 1)
Labels shape: (21471,)


In [10]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
y = to_categorical(y, num_classes=len(gesture_dirs))

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(gesture_dirs), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X, y, epochs=10, validation_split=0.2)

# Save the trained model
model.save('gesture_recognition_model.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 61ms/step - accuracy: 0.5556 - loss: 1.0612 - val_accuracy: 0.0000e+00 - val_loss: 16.1320
Epoch 2/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 72ms/step - accuracy: 0.8133 - loss: 0.4826 - val_accuracy: 0.0000e+00 - val_loss: 17.0388
Epoch 3/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 49ms/step - accuracy: 0.8702 - loss: 0.3453 - val_accuracy: 0.0000e+00 - val_loss: 18.3809
Epoch 4/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 49ms/step - accuracy: 0.8996 - loss: 0.2629 - val_accuracy: 0.0000e+00 - val_loss: 20.8998
Epoch 5/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 49ms/step - accuracy: 0.9244 - loss: 0.1977 - val_accuracy: 2.3283e-04 - val_loss: 24.2679
Epoch 6/10
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 49ms/step - accuracy: 0.9393 - loss: 0.1623 - val_accuracy: 0.0088 - val_loss



In [9]:
import numpy as np
import cv2
import pyautogui
import time
import tensorflow as tf
model = tf.keras.models.load_model('gesture_recognition_model.h5')

def preprocess_live_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (64, 64))
    frame = frame.astype('float32') / 255.0
    frame = np.expand_dims(frame, axis=-1)  # Add channel dimension
    return frame

def predict_gesture(frame):
    processed_frame = preprocess_live_frame(frame)
    prediction = model.predict(np.expand_dims(processed_frame, axis=0))
    return np.argmax(prediction)

cap = cv2.VideoCapture(0)
accelerating = False

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gesture = predict_gesture(frame)

    if gesture == label_map["one"]:
        pyautogui.press('right')
        print('right')
    elif gesture == label_map["thumbs up"]:
        pyautogui.press('left')
        print('left')
    elif gesture == label_map["revolt"]:
        pyautogui.press('down')
        print('right')
    elif gesture == label_map["open hand"]:
        pyautogui.press('space')
        print('space')
    elif gesture == label_map["okay"] and accelerating:
        pyautogui.keyUp('up')
        print('up')
        accelerating = False
    elif gesture == label_map["cheese"] and not accelerating:
        pyautogui.keyDown('up')
        print('stop')
        accelerating = True

    cv2.imshow('Frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
space
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
right
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42

In [3]:
"""
Storing the frames in the same folder as the video directory.
import cv2
import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def extract_frames_from_videos():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for video_file in os.listdir(gesture_path):
            if video_file.endswith(".avi"):
                video_path = os.path.join(gesture_path, video_file)
                cap = cv2.VideoCapture(video_path)
                frame_count = 0
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame_path = os.path.join(gesture_path, f"{video_file}_frame{frame_count}.jpg")
                    cv2.imwrite(frame_path, frame)
                    frame_count += 1
                cap.release()

extract_frames_from_videos()"""


In [4]:
"""import os

# List of directories
gesture_dirs = ["cheese", "okay", "one", "open hand", "revolt", "spiderman", "thumbs up"]
parent_directory = "dataset"

def delete_extracted_frames():
    for gesture in gesture_dirs:
        gesture_path = os.path.join(parent_directory, gesture)
        for file in os.listdir(gesture_path):
            if "_frame" in file and file.endswith(".jpg"):
                file_path = os.path.join(gesture_path, file)
                os.remove(file_path)
                print(f"Deleted {file_path}")

delete_extracted_frames()"""


Deleted dataset\cheese\1.avi_frame0.jpg
Deleted dataset\cheese\1.avi_frame1.jpg
Deleted dataset\cheese\1.avi_frame10.jpg
Deleted dataset\cheese\1.avi_frame11.jpg
Deleted dataset\cheese\1.avi_frame12.jpg
Deleted dataset\cheese\1.avi_frame13.jpg
Deleted dataset\cheese\1.avi_frame14.jpg
Deleted dataset\cheese\1.avi_frame15.jpg
Deleted dataset\cheese\1.avi_frame16.jpg
Deleted dataset\cheese\1.avi_frame17.jpg
Deleted dataset\cheese\1.avi_frame18.jpg
Deleted dataset\cheese\1.avi_frame19.jpg
Deleted dataset\cheese\1.avi_frame2.jpg
Deleted dataset\cheese\1.avi_frame20.jpg
Deleted dataset\cheese\1.avi_frame21.jpg
Deleted dataset\cheese\1.avi_frame22.jpg
Deleted dataset\cheese\1.avi_frame23.jpg
Deleted dataset\cheese\1.avi_frame24.jpg
Deleted dataset\cheese\1.avi_frame25.jpg
Deleted dataset\cheese\1.avi_frame26.jpg
Deleted dataset\cheese\1.avi_frame27.jpg
Deleted dataset\cheese\1.avi_frame28.jpg
Deleted dataset\cheese\1.avi_frame29.jpg
Deleted dataset\cheese\1.avi_frame3.jpg
Deleted dataset\chee