In [1]:
import cv2
import numpy as np
from keras import models
import pandas as pd
from sklearn.preprocessing import StandardScaler
import mediapipe as mp
import io

In [2]:
model = models.load_model("data/saved_mode.h5")
class_labels = pd.read_csv("data/class_labels.csv")['gesture'].tolist()

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2)
mp_drawing = mp.solutions.drawing_utils


def predict_gesture(landmarks_seq, frame_rate, frame_width, frame_height):
    header = ['frame'] + [f'{coord}_{i}' for i in range(21) for coord in ('x', 'y', 'z')] + ['frame_rate', 'frame_width', 'frame_height']
    frames = list(range(len(landmarks_seq)))
    data = [[i] + landmarks_seq[i] + [frame_rate, frame_width, frame_height] for i in frames]  # Initialize distance_0_1 as 0
    
    df = pd.DataFrame(columns=header)

    landmark_cols = [col for col in df.columns if col not in ['frame', 'frame_rate', 'frame_width', 'frame_height', 'distance_0_1']]
    scaler = StandardScaler()

    df[landmark_cols] = scaler.fit_transform(df[landmark_cols])

    # Feature Engineering: Calculating distances between specific points
    df['distance_0_1'] = ((df['x_0'] - df['x_1'])**2 + 
                          (df['y_0'] - df['y_1'])**2 + 
                          (df['z_0'] - df['z_1'])**2) ** 0.5

    csv_buffer =io.StringIO
    df.to_csv(csv_buffer, columns=header)
    csv_buffer.seek(0)


    # read CSV into a DF - sim the training data structure
    input_df = pd.read_csv(csv_buffer)
    features = [col for col in input_df.columns if col not in ["frame", "distance_0_1"]]

    X = input_df[features].values
    X_reshaped = X.reshape((1, X.shape[0], X.shape[1]))

    prediction = model.predict(X_reshaped)
    predicted_label = class_labels[np.argmax(prediction)]


    return predicted_label

def record():
    cap = cv2.VideoCapture(0)
    recording = False
    landmarks_seq = []

    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    frame_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    frame_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                if recording:
                    # Extract landmarks
                    landmarks = [lm for lm in hand_landmarks.landmark]
                    landmarks_flat = [coord for lm in landmarks for coord in (lm.x, lm.y, lm.z)]
                    landmarks_seq.append(landmarks_flat)

        cv2.imshow("TEST", frame)

        key = cv2.waitKey(5) & 0xFF

        # Start recording on 'r' key press
        if key == ord('r'):
            recording = True
            print("Recording gestures...")
        
        # Stop recording and predict on 's' key press
        elif key == ord('s'):
            recording = False
            if landmarks_seq:
                # Predict gesture from recorded landmarks sequence
                predicted_gesture = predict_gesture(landmarks_seq, frame_rate, frame_width, frame_height)
                print(f"Predicted Gesture: {predicted_gesture}")
            else:
                print("No gestures recorded.")
            landmarks_seq = []

        # Exit on 'q' key press
        elif key == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

record()



Recording gestures...


TypeError: descriptor 'write' for '_io.StringIO' objects doesn't apply to a 'str' object