In [1]:
import cv2
import os
import time
import numpy as np
import mediapipe as mp
import json
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
IMAGES_DIR = './images'
JSON_DIR = './json/'
JSON_DATA_DIR = './json/hand_landmarks_data.json'
single_hand_labels = ['Hallo', 'peace', 'I Love U', 'ich', 'bin']
double_hand_labels = ['Freunde', 'bachelor1', 'bachelor2']

In [3]:
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3)

if not os.path.exists(JSON_DIR):
    os.makedirs(JSON_DIR)

def save_landmarks_to_json(IMAGES_DIR, json_path):
    if not os.path.exists(IMAGES_DIR):
        print(f"Das Verzeichnis {IMAGES_DIR} existiert nicht.")
        return

    data = []
    labels_collected = []

    # Verarbeite Einzelhandzeichen
    for label in single_hand_labels:
        label_dir = os.path.join(IMAGES_DIR, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = cv2.imread(img_path)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                hand_results = hands.process(img_rgb)
                pose_results = pose.process(img_rgb)
                
                if hand_results.multi_hand_landmarks and pose_results.pose_landmarks:
                    for hand_landmarks in hand_results.multi_hand_landmarks:
                        landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
                        nose = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE]
                        left_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
                        right_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
                        landmarks.extend([[nose.x, nose.y, nose.z], [left_shoulder.x, left_shoulder.y, left_shoulder.z], [right_shoulder.x, right_shoulder.y, right_shoulder.z]])
                        data.append(landmarks)
                        labels_collected.append(label)

    # Verarbeite Doppelhandzeichen
    for label in double_hand_labels:
        label_dir = os.path.join(IMAGES_DIR, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = cv2.imread(img_path)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                hand_results = hands.process(img_rgb)
                pose_results = pose.process(img_rgb)
                
                if hand_results.multi_hand_landmarks and len(hand_results.multi_hand_landmarks) == 2 and pose_results.pose_landmarks:
                    for hand_landmarks in hand_results.multi_hand_landmarks:
                        landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
                        nose = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE]
                        left_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
                        right_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
                        landmarks.extend([[nose.x, nose.y, nose.z], [left_shoulder.x, left_shoulder.y, left_shoulder.z], [right_shoulder.x, right_shoulder.y, right_shoulder.z]])
                        data.append(landmarks)
                        labels_collected.append(label)

    # JSON-Datei erstellen
    with open(json_path, 'w') as f:
        json.dump({'data': data, 'labels': labels_collected}, f)

# Speichere die Handlandmarks in einer JSON-Datei
json_path = os.path.join(JSON_DIR, 'hand_landmarks_data.json')
save_landmarks_to_json(IMAGES_DIR, json_path)

print("Handlandmarks erfolgreich gespeichert.")

I0000 00:00:1732199607.392462  124454 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1732199607.444579  124605 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1732199607.459567  124454 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
W0000 00:00:1732199607.510395  124605 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732199607.907413  124612 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732199607.997903  124612 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signa

Handlandmarks erfolgreich gespeichert.


In [5]:
with open(JSON_DATA_DIR, 'r') as f:
    dataset = json.load(f)

data = dataset['data']
labels = dataset['labels']

X = np.array(data)
X = X.reshape(X.shape[0], -1)  # Flach machen
y = np.array(labels)

# Daten in Trainings- und Testset aufteilen
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modell trainieren
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Modell bewerten
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Genauigkeit: {accuracy * 100:.2f}%")
print(f"Präzision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-Score: {f1 * 100:.2f}%")

joblib.dump(model, 'hand_gesture_model.joblib')

Genauigkeit: 98.68%
Präzision: 98.65%
Recall: 98.68%
F1-Score: 98.65%


['hand_gesture_model.joblib']

In [None]:
model = joblib.load('hand_gesture_model.joblib')

# Mediapipe initialisieren
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

# Videoaufnahme starten
cap = cv2.VideoCapture(0)

# Labels für Doppelhandzeichen
double_hand_labels = ['Freunde', 'bachelor1', 'bachelor2']
recognized_gestures = []  # Liste zur Speicherung der erkannten Handzeichen, ohne Duplikate
last_gesture = None  # Das zuletzt erkannte Handzeichen
last_recognition_time = 0  # Zeit der letzten Erkennung

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Grauen Chat-Kasten auf der rechten Seite erstellen
    chat_width = 300
    chat_frame = np.ones((frame.shape[0], chat_width, 3), dtype=np.uint8) * 220  # Grauer Hintergrund (RGB: 220, 220, 220)
    
    # Originales Kamerabild in RGB konvertieren
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    hand_results = hands.process(img_rgb)
    pose_results = pose.process(img_rgb)

    if hand_results.multi_hand_landmarks and pose_results.pose_landmarks:
        # Initialisiere Variablen für das grüne Rechteck und die Landmarks
        landmarks = []
        x_coords = []
        y_coords = []

        # Zeichne Handverbindungen und speichere die Landmark-Koordinaten
        for hand_landmarks in hand_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            for lm in hand_landmarks.landmark:
                landmarks.append([lm.x, lm.y, lm.z])
                x_coords.append(lm.x)
                y_coords.append(lm.y)

        # Rechteck um die Hand zeichnen
        x_min = int(min(x_coords) * frame.shape[1])
        x_max = int(max(x_coords) * frame.shape[1])
        y_min = int(min(y_coords) * frame.shape[0])
        y_max = int(max(y_coords) * frame.shape[0])
        buffer = 20
        x_min = max(0, x_min - buffer)
        x_max = min(frame.shape[1], x_max + buffer)
        y_min = max(0, y_min - buffer)
        y_max = min(frame.shape[0], y_max + buffer)
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # Füge Nase und Schultern hinzu
        nose = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE]
        left_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER]
        right_shoulder = pose_results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER]
        landmarks.extend([[nose.x, nose.y, nose.z], [left_shoulder.x, left_shoulder.y, left_shoulder.z], [right_shoulder.x, right_shoulder.y, right_shoulder.z]])

        # Sicherstellen, dass die Landmarken die richtige Form haben
        landmarks = np.array(landmarks).flatten().reshape(1, -1)

        # Vorhersage mit dem Modell
        gesture = model.predict(landmarks)[0]
        confidence = max(model.predict_proba(landmarks)[0])

        # Überprüfen, ob das erkannte Zeichen ein Doppelhandzeichen ist und beide Hände erfasst sind
        if gesture in double_hand_labels and len(hand_results.multi_hand_landmarks) == 2:
            label = f'{gesture} ({confidence * 100:.2f}%)' if confidence >= 0.5 else 'Not Detected'
        elif gesture not in double_hand_labels:
            label = f'{gesture} ({confidence * 100:.2f}%)' if confidence >= 0.5 else 'Not Detected'
        else:
            label = 'Not Detected'

        # Update letzte Erkennung
        current_time = time.time()
        if confidence >= 0.5 and label != 'Not Detected' and (current_time - last_recognition_time > 1):
            x_min, y_min = 10, 30
            cv2.putText(frame, label, (x_min, y_min), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2, cv2.LINE_AA)
            if label and (gesture != last_gesture):
                if gesture == 'ich' and (not recognized_gestures or recognized_gestures[-1] != 'ich'):
                    recognized_gestures.append(gesture)
                elif gesture == 'bin' and recognized_gestures and recognized_gestures[-1] == 'ich':
                    recognized_gestures[-1] = 'ich bin'
                elif gesture != 'bin' and recognized_gestures and recognized_gestures[-1] == 'ich':
                    recognized_gestures[-1] = 'meine'
                    recognized_gestures.append(gesture)
                else:
                    recognized_gestures.append(gesture)
                last_gesture = gesture
                last_recognition_time = current_time  # Aktualisiere die Zeit der letzten Erkennung

        # Überprüfen, wo sich die Hand befindet
        hand_y = np.mean(y_coords)
        if hand_y < nose.y:
            position = "über der Nase"
        elif hand_y < min(left_shoulder.y, right_shoulder.y):
            position = "zwischen Schulter und Nase"
        else:
            position = "unter den Schultern"
        cv2.putText(frame, position, (x_min, y_min + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)
    else:
        # Falls keine Hand im Bild ist, zuletzt erkanntes Zeichen zurücksetzen
        last_gesture = None

    # Handzeichen im Chat-Kasten anzeigen
    for i, text in enumerate(recognized_gestures[-10:]):  # Nur die letzten 10 Einträge anzeigen
        y_pos = 30 + i * 30
        cv2.putText(chat_frame, text, (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2, cv2.LINE_AA)

    # Das Kamerabild und den Chat-Kasten nebeneinander anzeigen
    combined_frame = np.hstack((frame, chat_frame))

    # Zeige das Bild mit dem Handzeichen und der Chat-Anzeige an
    cv2.imshow('Live Hand Gesture Recognition', combined_frame)

    # Beenden mit 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

I0000 00:00:1732200240.056901  124454 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
W0000 00:00:1732200240.078644  127836 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1732200240.085189  124454 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
W0000 00:00:1732200240.130332  127839 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732200240.630014  127840 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732200240.673716  127842 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


ValueError: X has 135 features, but RandomForestClassifier is expecting 72 features as input.

: 