In [27]:
import os
import pandas as pd
import numpy as np
import zipfile
import tensorflow
from sklearn.model_selection import train_test_split

In [28]:
# load zip files into colab session, in order to extract data in session and then load files speedy for training and testing, because upload and mounting of google drive was not speedy at all
with zipfile.ZipFile('/content/data_fabi-20250112T105723Z-001.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data_dir_1')

with zipfile.ZipFile('/content/data-gesamt-20250112T110540Z-001.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data_dir_2')

data_dir_1 = '/content/data_dir_1'
data_dir_2 = '/content/data_dir_2'

In [29]:
# merge data
folder_mapping = {
    'data-gesamt/Telefon': ['data_fabi/telefon_nl', 'data_fabi/telefon'],
    'data-gesamt/Peace': ['data_fabi/peace_nl', 'data_fabi/peace'],
    'data-gesamt/Open-Hand': ['data_fabi/openhand_nl', 'data_fabi/openhand'],
    'data-gesamt/Faust': ['data_fabi/faust_nl', 'data_fabi/faust'],
    'data-gesamt/Thumb-Down': ['data_fabi/daumenrunter_nl', 'data_fabi/daumenrunter'],
    'data-gesamt/Thumb-Up': ['data_fabi/daumenhoch', 'data_fabi/daumenhoch_nl']
}

def read_files_to_df(folder_path):
    data = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            data.append({
                'file_name': file,
                'file_path': file_path,
                'folder': os.path.basename(root)
            })
    return pd.DataFrame(data)

merged_df = pd.DataFrame()

# read data_dir_2 and map to categories
for folder in folder_mapping.keys():
    folder_path = os.path.join(data_dir_2, folder)
    if os.path.exists(folder_path):
        df = read_files_to_df(folder_path)
        df['category'] = folder
        merged_df = pd.concat([merged_df, df])

# read data_dir_1 and map to categories
for dest_folder, src_folders in folder_mapping.items():
    for src_folder in src_folders:
        folder_path = os.path.join(data_dir_1, src_folder)
        if os.path.exists(folder_path):
            df = read_files_to_df(folder_path)
            df['category'] = dest_folder
            merged_df = pd.concat([merged_df, df])

In [30]:
merged_df

Unnamed: 0,file_name,file_path,folder,category
0,b_r_40_n_nl.npy,/content/data_dir_2/data-gesamt/Telefon/b_r_40...,Telefon,data-gesamt/Telefon
1,b_l_91_a_nl.npy,/content/data_dir_2/data-gesamt/Telefon/b_l_91...,Telefon,data-gesamt/Telefon
2,b_l_15_a_wl.npy,/content/data_dir_2/data-gesamt/Telefon/b_l_15...,Telefon,data-gesamt/Telefon
3,b_r_86_n_nl.npy,/content/data_dir_2/data-gesamt/Telefon/b_r_86...,Telefon,data-gesamt/Telefon
4,b_r_11_n_wl.npy,/content/data_dir_2/data-gesamt/Telefon/b_r_11...,Telefon,data-gesamt/Telefon
...,...,...,...,...
395,f_l_87_a_nl.npy,/content/data_dir_1/data_fabi/daumenhoch_nl/f_...,daumenhoch_nl,data-gesamt/Thumb-Up
396,f_l_77_a_nl.npy,/content/data_dir_1/data_fabi/daumenhoch_nl/f_...,daumenhoch_nl,data-gesamt/Thumb-Up
397,f_l_74_a_nl.npy,/content/data_dir_1/data_fabi/daumenhoch_nl/f_...,daumenhoch_nl,data-gesamt/Thumb-Up
398,f_r_82_c_nl.npy,/content/data_dir_1/data_fabi/daumenhoch_nl/f_...,daumenhoch_nl,data-gesamt/Thumb-Up


In [31]:
# clean category-concatination-string of category column in merged dataframe
merged_df['category'] = merged_df['category'].str.split('/').str.get(-1)

In [32]:
# check if each category is fully integrated - need 1600 samples per category
print(merged_df['category'].value_counts())

category
Telefon       1600
Peace         1600
Open-Hand     1600
Faust         1600
Thumb-Down    1600
Thumb-Up      1600
Name: count, dtype: int64


In [33]:
# now the data is in some sort of order because of the merging, and for training and testing, we need to shuffle it per category
merged_df[merged_df['category'] == 'Faust']

Unnamed: 0,file_name,file_path,folder,category
0,b_r_40_n_nl.npy,/content/data_dir_2/data-gesamt/Faust/b_r_40_n...,Faust,Faust
1,b_l_91_a_nl.npy,/content/data_dir_2/data-gesamt/Faust/b_l_91_a...,Faust,Faust
2,b_l_15_a_wl.npy,/content/data_dir_2/data-gesamt/Faust/b_l_15_a...,Faust,Faust
3,b_r_86_n_nl.npy,/content/data_dir_2/data-gesamt/Faust/b_r_86_n...,Faust,Faust
4,b_r_11_n_wl.npy,/content/data_dir_2/data-gesamt/Faust/b_r_11_n...,Faust,Faust
...,...,...,...,...
395,f_l_42_c_wl.npy,/content/data_dir_1/data_fabi/faust/f_l_42_c_w...,faust,Faust
396,f_l_96_c_wl.npy,/content/data_dir_1/data_fabi/faust/f_l_96_c_w...,faust,Faust
397,f_r_66_c_wl.npy,/content/data_dir_1/data_fabi/faust/f_r_66_c_w...,faust,Faust
398,f_r_9_c_wl.npy,/content/data_dir_1/data_fabi/faust/f_r_9_c_wl...,faust,Faust


In [34]:
# randomise entries per category
merged_df = merged_df.groupby('category').apply(lambda x: x.sample(frac=1)).reset_index(drop=True)

# check if order is now mixed
merged_df[merged_df['category'] == 'Faust']

  merged_df = merged_df.groupby('category').apply(lambda x: x.sample(frac=1)).reset_index(drop=True)


Unnamed: 0,file_name,file_path,folder,category
0,b_l_54_n_wl.npy,/content/data_dir_2/data-gesamt/Faust/b_l_54_n...,Faust,Faust
1,b_l_61_n_nl.npy,/content/data_dir_2/data-gesamt/Faust/b_l_61_n...,Faust,Faust
2,f_l_38_a_wl.npy,/content/data_dir_1/data_fabi/faust/f_l_38_a_w...,faust,Faust
3,b_l_68_a_nl.npy,/content/data_dir_2/data-gesamt/Faust/b_l_68_a...,Faust,Faust
4,f_r_92_a_nl.npy,/content/data_dir_1/data_fabi/faust_nl/f_r_92_...,faust_nl,Faust
...,...,...,...,...
1595,f_r_99_c_nl.npy,/content/data_dir_1/data_fabi/faust_nl/f_r_99_...,faust_nl,Faust
1596,f_l_45_a_wl.npy,/content/data_dir_1/data_fabi/faust/f_l_45_a_w...,faust,Faust
1597,f_r_35_a_wl.npy,/content/data_dir_1/data_fabi/faust/f_r_35_a_w...,faust,Faust
1598,f_l_32_c_nl.npy,/content/data_dir_1/data_fabi/faust_nl/f_l_32_...,faust_nl,Faust


In [51]:
# prepare data for training and testing
X = []
y = []

# map categories to numerical values
gestures = ['Faust', 'Peace', 'Thumb-Up', 'Thumb-Down', 'Open-Hand', 'Telefon']
category_to_label = {category: i for i, category in enumerate(gestures)}

for _, row in merged_df.iterrows():
    try:
        landmarks = np.load(row['file_path'])
        X.append(landmarks)
        y.append(category_to_label[row['category']])
    except Exception as e:
        print(f"Fehler beim Laden der Datei {row['file_path']}: {e}")

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [52]:
# evalute shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# evaluate distribution
for i, gesture in enumerate(gestures):
    print(f"{gesture}: {np.sum(y_train == i)} in Trainingsdaten, {np.sum(y_test == i)} in Testdaten")

X_train shape: (6720, 63)
X_test shape: (2880, 63)
y_train shape: (6720,)
y_test shape: (2880,)
Faust: 1120 in Trainingsdaten, 480 in Testdaten
Peace: 1120 in Trainingsdaten, 480 in Testdaten
Thumb-Up: 1120 in Trainingsdaten, 480 in Testdaten
Thumb-Down: 1120 in Trainingsdaten, 480 in Testdaten
Open-Hand: 1120 in Trainingsdaten, 480 in Testdaten
Telefon: 1120 in Trainingsdaten, 480 in Testdaten


In [53]:
# for building the layers of our model, we need to know the shape (63)
print("Shape of X_train:", X_train.shape)


Shape of X_train: (6720, 63)


In [54]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(63, 1)),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(6, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.3)


Epoch 1/100


  super().__init__(**kwargs)


[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.1709 - loss: 1.7922 - val_accuracy: 0.1969 - val_loss: 1.7899
Epoch 2/100
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.3297 - loss: 1.6176 - val_accuracy: 0.5149 - val_loss: 1.1937
Epoch 3/100
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5107 - loss: 1.1783 - val_accuracy: 0.5823 - val_loss: 1.0091
Epoch 4/100
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5892 - loss: 1.0325 - val_accuracy: 0.6190 - val_loss: 0.9537
Epoch 5/100
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.5853 - loss: 0.9968 - val_accuracy: 0.6438 - val_loss: 0.8999
Epoch 6/100
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.5986 - loss: 0.9416 - val_accuracy: 0.6677 - val_loss: 0.8639
Epoch 7/100
[1m147/147[0m [32

In [55]:
# save the model
model.save('/content/' + 'Versuch3_B_Model_100epochs_lstm_acc_0980.hdf5')



In [56]:
Ftest_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9762 - loss: 0.0896
Test accuracy: 0.9743055701255798


The following code is then for live-testing of the trained model (locally to be able to start opencv camera and get live feedback which gesture you show)

In [None]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.20-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.20-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.20 sounddevice-0.5.1


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow
from tensorflow.keras.models import load_model

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

model = load_model(f'path-to-model')
gestures = ['Faust', 'Peace', 'Thumb-Up', 'Thumb-Down', 'Open-Hand', 'Telefon']

cap = cv2.VideoCapture(0)
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])

            landmarks = np.array(landmarks).reshape(1, 63, 1)
            prediction = model.predict(landmarks)
            gesture_index = np.argmax(prediction)
            gesture = gestures[gesture_index]

            cv2.putText(frame, gesture, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('Hand Gesture Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


