In [2]:
import os
import cv2
import numpy as np
import mediapipe as mp
import json
import time

In [None]:
IMAGES_DIR = './images'
JSON_DIR = './json/'
single_hand_labels = ['Hallo', 'peace', 'I Love U', 'ich', 'bin' ] #'S', 'E', 'R', 'T', 'A', 'N']
double_hand_labels = ['Freunde', 'bachelor1', 'bachelor2']

In [5]:
def capture_images(IMAGES_DIR, single_hand_labels, double_hand_labels, num_images_per_label=50):
    if not os.path.exists(IMAGES_DIR):
        os.makedirs(IMAGES_DIR)

    cap = cv2.VideoCapture(0)
    current_label_index = 0

    # Bildaufnahme für Einzelhandzeichen
    for label in single_hand_labels:
        print(f"Bereit für Einzelhandzeichen: {label}. Drücke 'g', um Bilder aufzunehmen.")
        while True:
            ret, frame = cap.read()
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('g'):
                # 3 Sekunden Countdown
                for i in range(3, 0, -1):
                    print(f"Capturing in {i} seconds...")
                    time.sleep(1)

                # Aufnahme für die rechte Hand
                label_dir = os.path.join(IMAGES_DIR, label)
                os.makedirs(label_dir, exist_ok=True)
                for i in range(num_images_per_label):
                    ret, frame = cap.read()
                    img_name = f"{label_dir}/{label}_right_{i}.jpg"
                    cv2.imwrite(img_name, frame)
                    print(f"Captured {img_name}")
                    cv2.imshow('frame', frame)
                    cv2.waitKey(100)

                print(f"Fertig mit dem ersten Satz für {label}. Jetzt die andere Hand bereit machen und 'g' drücken, um fortzufahren.")
            
                # Warte, bis der Benutzer bereit ist, den nächsten Satz Bilder aufzunehmen
                while True:
                    if cv2.waitKey(1) & 0xFF == ord('g'):
                        break

                # Aufnahme für die linke Hand
                for i in range(num_images_per_label):
                    ret, frame = cap.read()
                    img_name = f"{label_dir}/{label}_left_{i}.jpg"
                    cv2.imwrite(img_name, frame)
                    print(f"Captured {img_name}")
                    cv2.imshow('frame', frame)
                    cv2.waitKey(100)

                print(f"Fertig mit Einzelhandzeichen: {label}. Drücke 'g' für das nächste Label.")
                break

    # Bildaufnahme für Doppelhandzeichen
    for label in double_hand_labels:
        print(f"Bereit für Doppelhandzeichen: {label}. Drücke 'g', um Bilder aufzunehmen.")
        while True:
            ret, frame = cap.read()
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('g'):
                # 3 Sekunden Countdown
                for i in range(3, 0, -1):
                    print(f"Capturing in {i} seconds...")
                    time.sleep(1)

                # Aufnahme für Doppelhand
                label_dir = os.path.join(IMAGES_DIR, label)
                os.makedirs(label_dir, exist_ok=True)
                for i in range(num_images_per_label):
                    ret, frame = cap.read()
                    img_name = f"{label_dir}/{label}_{i}.jpg"  # Nur ein Label für beide Hände
                    cv2.imwrite(img_name, frame)
                    print(f"Captured {img_name}")
                    cv2.imshow('frame', frame)
                    cv2.waitKey(100)

                print(f"Fertig mit Doppelhandzeichen: {label}. Drücke 'g' für das nächste Label.")
                break

    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

capture_images(IMAGES_DIR, single_hand_labels, double_hand_labels)

Bereit für Einzelhandzeichen: Hallo. Drücke 'g', um Bilder aufzunehmen.
Capturing in 3 seconds...
Capturing in 2 seconds...
Capturing in 1 seconds...
Captured ../images/images_without_landmarks/Hallo/Hallo_right_0.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_1.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_2.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_3.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_4.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_5.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_6.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_7.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_8.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_9.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_10.jpg
Captured ../images/images_without_landmarks/Hallo/Hallo_right_11.jpg
Captured ../images/images_withou

In [6]:
# Funktion zur Bildaugmentation
def augment_image(img):
    rows, cols, _ = img.shape

    # Zufällige Drehung
    angle = np.random.uniform(-30, 30)
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows))

    # Zufällige Skalierung
    scale = np.random.uniform(0.7, 1.3)
    img = cv2.resize(img, None, fx=scale, fy=scale)

    # Zufällige Verschiebung
    max_dx = 0.1 * cols
    max_dy = 0.1 * rows
    dx = np.random.uniform(-max_dx, max_dx)
    dy = np.random.uniform(-max_dy, max_dy)
    M = np.float32([[1, 0, dx], [0, 1, dy]])
    img = cv2.warpAffine(img, M, (cols, rows))

    # Zufällige Spiegelung
    if np.random.rand() < 0.5:
        img = cv2.flip(img, 1)

    return img

def augment_and_save_images(img_dir, num_augmented_images):
    for img_name in os.listdir(img_dir):
        img_path = os.path.join(img_dir, img_name)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            
            # Füge augmentierte Bilder hinzu
            for i in range(num_augmented_images):
                augmented_img = augment_image(img)
                
                # Erstelle neuen Dateinamen für augmentierte Bilder
                augmented_img_name = f"{img_name.split('.')[0]}_aug_{i}.jpg"
                augmented_img_path = os.path.join(img_dir, augmented_img_name)
                
                # Speichere das augmentierte Bild
                cv2.imwrite(augmented_img_path, augmented_img)
                print(f"Augmentiertes Bild gespeichert: {augmented_img_path}")

# Augmentiere Bilder für jedes Label in single_hand_labels
for label in single_hand_labels:
    label_path = os.path.join(IMAGES_DIR, label)
    if os.path.isdir(label_path):
        augment_and_save_images(label_path, num_augmented_images=5)

# Augmentiere Bilder für jedes Label in double_hand_labels
for label in double_hand_labels:
    label_path = os.path.join(IMAGES_DIR, label)
    if os.path.isdir(label_path):
        augment_and_save_images(label_path, num_augmented_images=5)

Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_right_45_aug_0.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_right_45_aug_1.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_right_45_aug_2.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_right_45_aug_3.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_right_45_aug_4.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_left_28_aug_0.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_left_28_aug_1.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_left_28_aug_2.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_left_28_aug_3.jpg
Augmentiertes Bild gespeichert: ../images/images_without_landmarks/Hallo/Hallo_left_28_aug_4.jpg
Augmentiertes Bild gespei

In [None]:

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

if not os.path.exists(JSON_DIR):
    os.makedirs(JSON_DIR)

def save_landmarks_to_json(IMAGES_DIR, json_path):
    if not os.path.exists(IMAGES_DIR):
        print(f"Das Verzeichnis {IMAGES_DIR} existiert nicht.")
        return

    data = []
    labels_collected = []

    # Verarbeite Einzelhandzeichen
    for label in single_hand_labels:
        label_dir = os.path.join(IMAGES_DIR, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = cv2.imread(img_path)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                results = hands.process(img_rgb)
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
                        data.append(landmarks)
                        labels_collected.append(label)  # Einzelhandlabel hinzufügen

    # Verarbeite Doppelhandzeichen
    for label in double_hand_labels:
        label_dir = os.path.join(IMAGES_DIR, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = cv2.imread(img_path)
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                results = hands.process(img_rgb)
                if results.multi_hand_landmarks and len(results.multi_hand_landmarks) == 2:  # Sicherstellen, dass beide Hände erkannt werden
                    for hand_landmarks in results.multi_hand_landmarks:
                        landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
                        data.append(landmarks)
                        labels_collected.append(label)  # Doppelhandlabel hinzufügen

    # JSON-Datei erstellen
    with open(json_path, 'w') as f:
        json.dump({'data': data, 'labels': labels_collected}, f)

# Speichere die Handlandmarks in einer JSON-Datei
json_path = os.path.join(JSON_DIR, 'hand_landmarks_data.json')
save_landmarks_to_json(IMAGES_DIR, json_path)

print("Handlandmarks erfolgreich gespeichert.")


I0000 00:00:1731875452.014335  252393 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
W0000 00:00:1731875452.101369  252930 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731875452.164386  252930 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


FileNotFoundError: [Errno 2] No such file or directory: './json/hand_landmarks_data.json'

In [7]:
import cv2
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import json
import joblib

In [1]:
JSON_DATA_DIR = './json/hand_landmarks_data.json'

In [8]:
with open(JSON_DATA_DIR, 'r') as f:
    dataset = json.load(f)

data = dataset['data']
labels = dataset['labels']

X = np.array(data)
X = X.reshape(X.shape[0], -1)  # Flach machen
y = np.array(labels)

# Daten in Trainings- und Testset aufteilen
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modell trainieren
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Modell bewerten
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Genauigkeit: {accuracy * 100:.2f}%")
print(f"Präzision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-Score: {f1 * 100:.2f}%")

joblib.dump(model, 'hand_gesture_model.joblib')


Genauigkeit: 98.68%
Präzision: 98.68%
Recall: 98.68%
F1-Score: 98.67%


['hand_gesture_model.joblib']

In [None]:
model = joblib.load('hand_gesture_model.joblib')

# Mediapipe initialisieren
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Videoaufnahme starten
cap = cv2.VideoCapture(0)

# Labels für Doppelhandzeichen
double_hand_labels = ['Freunde', 'bachelor1', 'bachelor2']
recognized_gestures = []  # Liste zur Speicherung der erkannten Handzeichen, ohne Duplikate
last_gesture = None  # Das zuletzt erkannte Handzeichen

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Grauen Chat-Kasten auf der rechten Seite erstellen
    chat_width = 300
    chat_frame = np.ones((frame.shape[0], chat_width, 3), dtype=np.uint8) * 220  # Grauer Hintergrund (RGB: 220, 220, 220)
    
    # Originales Kamerabild in RGB konvertieren
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        # Initialisiere Variablen für das grüne Rechteck und die Landmarks
        landmarks = []
        x_coords = []
        y_coords = []

        # Zeichne Handverbindungen und speichere die Landmark-Koordinaten
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            for lm in hand_landmarks.landmark:
                landmarks.append([lm.x, lm.y, lm.z])
                x_coords.append(lm.x)
                y_coords.append(lm.y)

        # Rechteck um die Hand zeichnen
        x_min = int(min(x_coords) * frame.shape[1])
        x_max = int(max(x_coords) * frame.shape[1])
        y_min = int(min(y_coords) * frame.shape[0])
        y_max = int(max(y_coords) * frame.shape[0])
        buffer = 20
        x_min = max(0, x_min - buffer)
        x_max = min(frame.shape[1], x_max + buffer)
        y_min = max(0, y_min - buffer)
        y_max = min(frame.shape[0], y_max + buffer)
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # Sicherstellen, dass die Landmarken die richtige Form haben
        landmarks = np.array(landmarks).flatten()[:63].reshape(1, -1)

        # Vorhersage mit dem Modell
        gesture = model.predict(landmarks)[0]
        confidence = max(model.predict_proba(landmarks)[0])

        # Überprüfen, ob das erkannte Zeichen ein Doppelhandzeichen ist und beide Hände erfasst sind
        if gesture in double_hand_labels and len(results.multi_hand_landmarks) == 2:
            label = f'{gesture} ({confidence * 100:.2f}%)' if confidence >= 0.5 else 'Not Detected'
        elif gesture not in double_hand_labels:
            label = f'{gesture} ({confidence * 100:.2f}%)' if confidence >= 0.5 else 'Not Detected'
        else:
            label = 'Not Detected'

        # Label und Erkennungsrate anzeigen, wenn erkannt
        if confidence >= 0.5 and label != 'Not Detected':
            cv2.putText(frame, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2, cv2.LINE_AA)
            if label and (gesture != last_gesture):
                recognized_gestures.append(gesture)  # Neues Handzeichen zur Liste hinzufügen
                last_gesture = gesture
    else:
        # Falls keine Hand im Bild ist, zuletzt erkanntes Zeichen zurücksetzen
        last_gesture = None

    # Handzeichen im Chat-Kasten anzeigen
    for i, text in enumerate(recognized_gestures[-10:]):  # Nur die letzten 10 Einträge anzeigen
        y_pos = 30 + i * 30
        cv2.putText(chat_frame, text, (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2, cv2.LINE_AA)

    # Das Kamerabild und den Chat-Kasten nebeneinander anzeigen
    combined_frame = np.hstack((frame, chat_frame))

    # Zeige das Bild mit dem Handzeichen und der Chat-Anzeige an
    cv2.imshow('Live Hand Gesture Recognition', combined_frame)

    # Beenden mit 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

I0000 00:00:1731874159.980452  244749 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-18.8.16), renderer: Intel(R) Iris(TM) Graphics 6100
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1731874160.008611  245205 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731874160.029675  245205 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


ValueError: X has 63 features, but RandomForestClassifier is expecting 72 features as input.

: 