In [1]:
pip install opencv-python mediapipe numpy

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\hp\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [9]:
import cv2
import os

# Define the static gestures
static_gestures = ["thumbs_up", "victory", "index", "open_palm", "smile", "three", "four", "yo"]

# Ask user for the gesture name
gesture_name = input(f"Enter the gesture name from {static_gestures}: ").strip()

if gesture_name not in static_gestures:
    print("Invalid gesture name! Please enter a valid gesture.")
    exit()

# Create a directory to save images
save_path = f"static_dataset/{gesture_name}"
os.makedirs(save_path, exist_ok=True)

cap = cv2.VideoCapture(0)  # Open webcam
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    resized = cv2.resize(gray, (64, 64))  # Resize to 64x64 pixels
    
    # Save the image
    cv2.imwrite(f"{save_path}/{count}.jpg", resized)
    count += 1

    # Display the video with text overlay
    cv2.putText(frame, f"Collecting {gesture_name}: {count}/500", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Static Gesture Collection", frame)

    # Stop after collecting 500 images or press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q') or count >= 500:
        break

cap.release()
cv2.destroyAllWindows()

Enter the gesture name from ['thumbs_up', 'victory', 'index', 'open_palm', 'smile', 'three', 'four', 'yo']:  yo


In [16]:
# Define dynamic gestures
dynamic_gestures = ["hi", "circle", "swipe_left", "zoom_in_out", "wave", "sixer"]

# Ask user for the gesture name
gesture_name = input(f"Enter the dynamic gesture name from {dynamic_gestures}: ").strip()

if gesture_name not in dynamic_gestures:
    print("Invalid gesture name! Please enter a valid gesture.")
    exit()

# Create directory to save videos
save_path = f"dynamic_dataset/{gesture_name}"
os.makedirs(save_path, exist_ok=True)

cap = cv2.VideoCapture(0)  # Open webcam
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

video_name = f"{save_path}/{gesture_name}.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Video format
out = cv2.VideoWriter(video_name, fourcc, 20.0, (640, 480))  # 20 FPS, 640x480 resolution

print(f"Recording {gesture_name}... Press 'q' to stop.")
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    out.write(frame)  # Save video frame
    cv2.imshow("Recording Dynamic Gesture", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to stop recording
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print(f"Saved video: {video_name}")

Enter the dynamic gesture name from ['hi', 'circle', 'swipe_left', 'zoom_in_out', 'wave', 'sixer']:  sixer


Recording sixer... Press 'q' to stop.
Saved video: dynamic_dataset/sixer/sixer.avi


In [17]:
import cv2
import os

def extract_frames(video_path, output_folder, frame_rate=5):
    os.makedirs(output_folder, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_interval = max(1, fps // frame_rate)  # Capture 5 frames per second

    count = 0
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        if count % frame_interval == 0:
            frame_path = os.path.join(output_folder, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_count += 1
            
        count += 1
    
    cap.release()
    print(f"Extracted {frame_count} frames from {video_path}")

# Process all videos in the dataset
dataset_path = "dynamic_dataset"
output_path = "dynamic_frames"

for gesture in os.listdir(dataset_path):
    gesture_path = os.path.join(dataset_path, gesture)
    output_gesture_path = os.path.join(output_path, gesture)
    
    if os.path.isdir(gesture_path):
        for video in os.listdir(gesture_path):
            video_path = os.path.join(gesture_path, video)
            extract_frames(video_path, output_gesture_path)


Extracted 256 frames from dynamic_dataset\circle\circle.avi
Extracted 227 frames from dynamic_dataset\hi\hi.avi
Extracted 415 frames from dynamic_dataset\sixer\sixer.avi
Extracted 215 frames from dynamic_dataset\swipe_left\swipe_left.avi
Extracted 345 frames from dynamic_dataset\wave\wave.avi
Extracted 0 frames from dynamic_dataset\zoom_in\out
Extracted 435 frames from dynamic_dataset\zoom_in_out\zoom_in_out.avi


In [18]:
!pip install tensorflow

Collecting tensorflow
  Using cached tensorflow-2.18.0-cp310-cp310-win_amd64.whl (7.5 kB)
Collecting tensorflow-intel==2.18.0
  Downloading tensorflow_intel-2.18.0-cp310-cp310-win_amd64.whl (390.0 MB)
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.31.0-cp310-cp310-win_amd64.whl (1.5 MB)
Collecting termcolor>=1.1.0
  Downloading termcolor-2.5.0-py3-none-any.whl (7.8 kB)
Collecting ml-dtypes<0.5.0,>=0.4.0
  Downloading ml_dtypes-0.4.1-cp310-cp310-win_amd64.whl (126 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1
  Downloading gast-0.6.0-py3-none-any.whl (21 kB)
Collecting tensorboard<2.19,>=2.18
  Downloading tensorboard-2.18.0-py3-none-any.whl (5.5 MB)
Collecting wrapt>=1.11.0
  Downloading wrapt-1.17.2-cp310-cp310-win_amd64.whl (38 kB)
Collecting grpcio<2.0,>=1.24.3
  Downloading grpcio-1.70.0-cp310-cp310-win_amd64.whl (4.3 MB)
Collecting h5py>=3.11.

You should consider upgrading via the 'C:\Users\hp\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [19]:
import IPython
IPython.display.clear_output()

In [21]:
import tensorflow as tf
!pip show tensorflow

Name: tensorflow
Version: 2.18.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: c:\users\hp\appdata\local\programs\python\python310\lib\site-packages
Requires: tensorflow-intel
Required-by: 


In [22]:
import sys
print(sys.executable)

C:\Users\hp\AppData\Local\Programs\Python\Python310\python.exe


In [24]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp310-cp310-win_amd64.whl (11.1 MB)
Collecting joblib>=1.2.0
  Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Collecting threadpoolctl>=3.1.0
  Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.5.0


You should consider upgrading via the 'C:\Users\hp\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [25]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

In [26]:
# Define paths
dataset_path = "dynamic_frames"
img_size = (64, 64)  # Resize images
gestures = sorted(os.listdir(dataset_path))  # Gesture labels
gesture_to_label = {gesture: idx for idx, gesture in enumerate(gestures)}

X, y = [], []

# Load images and labels
for gesture, label in gesture_to_label.items():
    gesture_path = os.path.join(dataset_path, gesture)
    
    for img_name in os.listdir(gesture_path):
        img_path = os.path.join(gesture_path, img_name)
        img = cv2.imread(img_path)
        
        if img is None:
            continue

        img = cv2.resize(img, img_size)  # Resize
        img = img_to_array(img) / 255.0  # Normalize

        X.append(img)
        y.append(label)

X = np.array(X)
y = np.array(y)

# Convert labels to categorical
y = to_categorical(y, num_classes=len(gestures))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset loaded successfully!")
print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")

Dataset loaded successfully!
Train size: 1745, Test size: 437


In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(gestures), activation='softmax')  # Output layer
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Save the model
model.save("dynamic_gesture_model.h5")
print("Model trained and saved!")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 149ms/step - accuracy: 0.1967 - loss: 1.9513 - val_accuracy: 0.4531 - val_loss: 1.4020
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 136ms/step - accuracy: 0.4873 - loss: 1.3446 - val_accuracy: 0.5812 - val_loss: 1.0732
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 140ms/step - accuracy: 0.6539 - loss: 0.9771 - val_accuracy: 0.7025 - val_loss: 0.8281
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 139ms/step - accuracy: 0.7336 - loss: 0.7762 - val_accuracy: 0.7689 - val_loss: 0.6545
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 136ms/step - accuracy: 0.7595 - loss: 0.6914 - val_accuracy: 0.8261 - val_loss: 0.5515
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 136ms/step - accuracy: 0.8214 - loss: 0.5353 - val_accuracy: 0.8352 - val_loss: 0.4530
Epoch 7/10
[1m55/55[0m [



Model trained and saved!


In [28]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.8927 - loss: 0.2841 
Test Accuracy: 89.70%


In [29]:
def predict_gesture(image_path, model):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (64, 64)) / 255.0
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    
    prediction = model.predict(img)
    gesture_index = np.argmax(prediction)
    return gestures[gesture_index]

# Example usage
from tensorflow.keras.models import load_model

model = load_model("dynamic_gesture_model.h5")
predicted_gesture = predict_gesture("dynamic_frames/hi/frame_10.jpg", model)
print("Predicted Gesture:", predicted_gesture)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step
Predicted Gesture: hi


In [None]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model("dynamic_gesture_model.h5")

# Define gesture labels (same order as used during training)
gestures = ["hi", "circle", "swipe_left", "swipe_right", "wave", "sixer", "zoom_in/out"]  # Modify as needed

# Open webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Preprocess the frame
    img = cv2.resize(frame, (64, 64)) / 255.0  # Resize & normalize
    img = np.expand_dims(img, axis=0)  # Add batch dimension

    # Predict gesture
    prediction = model.predict(img)
    gesture_index = np.argmax(prediction)
    predicted_gesture = gestures[gesture_index]

    # Display the prediction on the webcam feed
    cv2.putText(frame, f"Gesture: {predicted_gesture}", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    
    cv2.imshow("Live Gesture Recognition", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




Press 'q' to quit.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[