In [1]:
#%pip install tensorflow
import cv2
import os
import time
import numpy as np
import mediapipe as mp
import tensorflow as tf
from utils.image_treatment import preprocess_mediapipe_landmarks

In [2]:
VIDEO_FOLDER = "test_videos"

In [3]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [4]:
os.makedirs(VIDEO_FOLDER, exist_ok=True)

In [5]:
#model_save_path = 'data/keypoint_classifier.keras'
model_save_path = 'data/keypoint_classifier_merge.keras'
model = tf.keras.models.load_model(model_save_path)
NUM_CLASSES = model.output_shape[-1]
NUM_CLASSES

10

In [6]:
GESTURE_LABELS = {
    0: 'OPEN_PALM',
    1: 'FIST',
    2: 'MOVE_UP',
    3: 'OK_SIGN',
    4: 'MOVE_DOWN',
    5: 'BACK',
    6: 'LEFT',
    7: 'RIGHT',
    8: 'INDEX_FRONT',
    9: 'PINKY_BACK'
}

In [10]:
timestamp = time.strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(VIDEO_FOLDER, f"tello_gesture_trained_{timestamp}.avi")

cap = cv2.VideoCapture(0)
width, height = 640, 480
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), 20.0, (width, height))

with mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Flip horizontal pour effet miroir
        frame = cv2.flip(frame, 1)
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = hands.process(image_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Affichage des points
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Préparer vecteur d’entrée
                h, w = frame.shape[:2]
                input_vector = preprocess_mediapipe_landmarks(hand_landmarks, w, h)
                input_vector = np.expand_dims(input_vector, axis=0)  # (1, 42)
                print("input is", input_vector)
                # Prédiction
                prediction = model.predict(input_vector)
                pred_class = int(np.argmax(prediction))
                print("prediction is", pred_class)
                pred_label = GESTURE_LABELS.get(pred_class, 'Unknown')

                # Affichage sur la frame
                cv2.putText(frame, f'Gesture: {pred_label}', (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                
                #print(f"Gesture reconnu : {pred_label}")

        out.write(frame)
        cv2.imshow("Hand Gesture Recognition", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
out.release()
cv2.destroyAllWindows()

input is [[ 0.          0.         -0.19724771 -0.07798165 -0.33486238 -0.2293578
  -0.4357798  -0.37155962 -0.5412844  -0.48623854 -0.1146789  -0.49082568
  -0.12385321 -0.7201835  -0.12844037 -0.8715596  -0.1146789  -1.
   0.0412844  -0.48623854  0.09174312 -0.71100914  0.14220184 -0.85779816
   0.19724771 -0.9724771   0.16972478 -0.43119267  0.23394495 -0.63761467
   0.29357797 -0.75688076  0.35321102 -0.86238533  0.27522936 -0.3440367
   0.3761468  -0.48165137  0.44495413 -0.559633    0.5091743  -0.6330275 ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
prediction is 0
input is [[ 0.          0.         -0.14932127 -0.239819   -0.24886878 -0.42081448
  -0.3438914  -0.5656109  -0.44343892 -0.67420816 -0.13122173 -0.5882353
  -0.14932127 -0.77375567 -0.15384616 -0.8914027  -0.15384616 -1.
  -0.01809955 -0.57918555 -0.00452489 -0.77375567  0.01357466 -0.8914027
   0.03167421 -0.9954751   0.08144797 -0.5520362   0.11312217 -0.7285068
   0.13574661 -0.8597285 

KeyboardInterrupt: 