In [9]:
import cv2
import numpy as np
from keras import models
import pandas as pd
from sklearn.preprocessing import StandardScaler
import mediapipe as mp

In [17]:
model = models.load_model("data/saved_mode.h5")
class_labels = pd.read_csv("data/class_labels.csv")['gesture'].tolist()


mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2)

landmarks_seq = []
recording = False
# Function to preprocess a frame
def preprocess_frame(frame):
    # Recolor the frame from BGR to RGB
    recolor_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return recolor_frame

def extract_landmarks(frame):
    landmarks = hands.process(frame)
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    if landmarks.multi_hand_landmarks:
        for hand_landmarks in landmarks.multi_hand_landmarks:
            if recording:
                # Extract landmarks
                landmarks = [lm for lm in hand_landmarks.landmark]
                landmarks_flat = [coord for lm in landmarks for coord in (lm.x, lm.y, lm.z)]
                landmarks_seq.append(landmarks_flat)
    return landmarks


# Function to extract features from a frame (example using landmarks)
def extract_features(frame):
    # Implement your feature extraction logic here
    # Example: extract landmarks from the frame
    landmarks = extract_landmarks(frame)


    return landmarks

# Function to predict gesture from a preprocessed frame
def predict_gesture(preprocessed_frame):
    # Reshape and scale features as per your model's expectations
    features = scaler.transform(preprocessed_frame)  # Use the same scaler as in training
    
    # Reshape features for LSTM input (1 sample, 1 timestep, features)
    X = features.reshape((1, 1, len(features)))
    
    # Make prediction
    prediction = model.predict(X)
    predicted_label = class_labels[np.argmax(prediction)]
    return predicted_label

# Initialize video capture from webcam
cap = cv2.VideoCapture(0)  # Change the argument to a video file path if working with a video file

# Define frame dimensions
width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # Example dimensions, adjust according to your model's input size

# Load scaler and any other preprocessing steps used during training
scaler = StandardScaler()  # Example, replace with your actual preprocessing steps

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    # Preprocess the frame
    processed_frame = preprocess_frame(frame)
    
    # Extract features from the preprocessed frame
    extracted_features = extract_features(processed_frame)
    
    # # Perform preprocessing steps used during training (e.g., scaling)
    # scaled_features = scaler.fit_transform(extracted_features)
    
    # # Make prediction
    # predicted_gesture = predict_gesture(scaled_features)
    
    # # Display prediction text on the frame
    # cv2.putText(frame, predicted_gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    
    # Display the frame
    cv2.imshow('Real-time Gesture Recognition', frame)
    
    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and close all windows
cap.release()
cv2.destroyAllWindows()


