In [1]:
import os
import cv2
import mediapipe as mp
import csv

data_dir = "DatasetNew/"
output_csv = "gesture_dataset.csv"

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

header = ["label", "video", "frame"]
for i in range(21):
    header.extend([f"x{i}", f"y{i}", f"z{i}"])

with open(output_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(header)

    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if not os.path.isdir(label_path):
            continue

        for video_file in os.listdir(label_path):
            if not video_file.endswith(".mp4"):
                continue

            cap = cv2.VideoCapture(os.path.join(label_path, video_file))
            frame_idx = 0

            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                result = hands.process(rgb)

                if result.multi_hand_landmarks:
                    landmarks = result.multi_hand_landmarks[0].landmark
                    row = [label, video_file, frame_idx]
                    for lm in landmarks:
                        row.extend([lm.x, lm.y, lm.z])
                    writer.writerow(row)

                frame_idx += 1

            cap.release()

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("gesture_dataset.csv")

X = df.drop(columns=["label", "video", "frame"])
y = df["label"]

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [4]:
X_train

Unnamed: 0,x0,y0,z0,x1,y1,z1,x2,y2,z2,x3,...,z17,x18,y18,z18,x19,y19,z19,x20,y20,z20
1128,0.367902,0.889174,2.376150e-07,0.394573,0.859474,-0.007887,0.430224,0.856570,-0.014107,0.458497,...,-0.023828,0.406891,1.001689,-0.027359,0.423599,1.014430,-0.025523,0.435853,1.020190,-0.023856
2970,0.362478,0.614242,-1.516719e-07,0.385333,0.565196,-0.003040,0.411310,0.523947,-0.010696,0.426912,...,-0.024494,0.430917,0.676716,-0.021702,0.421964,0.670258,-0.013711,0.412008,0.666728,-0.009128
1697,0.361143,0.673725,-1.458987e-07,0.379858,0.625585,-0.001491,0.402138,0.583327,-0.007496,0.413146,...,-0.021748,0.428233,0.724524,-0.018303,0.419452,0.722126,-0.010905,0.410207,0.718248,-0.006956
5036,0.359400,0.922701,6.765624e-08,0.374597,0.957778,0.000133,0.375474,0.997812,0.000105,0.366659,...,-0.003771,0.322302,1.021023,-0.003903,0.322617,1.036382,-0.001018,0.322562,1.045641,0.001569
2224,0.540470,0.715040,-1.195863e-07,0.521242,0.674679,-0.002828,0.494651,0.631979,-0.010036,0.480091,...,-0.034379,0.487979,0.751235,-0.040289,0.496995,0.748082,-0.034930,0.509894,0.745384,-0.030721
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4426,0.359952,0.940275,3.605387e-07,0.380613,0.869980,-0.020910,0.372092,0.798146,-0.028689,0.337744,...,-0.025295,0.247151,0.770590,-0.047135,0.262561,0.776386,-0.051134,0.281897,0.796604,-0.049768
466,0.398662,0.630019,-1.800934e-07,0.419358,0.570804,-0.009049,0.429616,0.517464,-0.013829,0.429497,...,-0.024759,0.427577,0.637895,-0.032512,0.421920,0.633804,-0.034164,0.411446,0.625778,-0.036036
3092,0.369442,0.650472,-1.090105e-07,0.388751,0.602664,-0.002762,0.413778,0.555742,-0.009432,0.425364,...,-0.024203,0.436806,0.695107,-0.021318,0.428514,0.688866,-0.013569,0.416790,0.685228,-0.009334
3772,0.323900,0.324152,1.706665e-07,0.351810,0.283202,-0.008134,0.369240,0.227155,-0.008942,0.378589,...,-0.008655,0.286350,0.154638,-0.013792,0.282796,0.126959,-0.016817,0.280218,0.100803,-0.018863


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

df = pd.read_csv("gesture_dataset.csv")

X = df.drop(columns=["label", "video", "frame"])
y = df["label"]

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluasi model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Simpan model dan label encoder
joblib.dump(model, "gesture_model.pkl")
joblib.dump(le, "label_encoder.pkl")

print("\n✅ Model dan label encoder berhasil disimpan!")

Accuracy: 0.9922854387656702

Classification Report:
                 precision    recall  f1-score   support

  Tangan_Jempol       0.99      1.00      1.00       624
Tangan_Melambai       1.00      0.98      0.99       250
   Tangan_Peace       0.99      0.98      0.98       163

       accuracy                           0.99      1037
      macro avg       0.99      0.99      0.99      1037
   weighted avg       0.99      0.99      0.99      1037


✅ Model dan label encoder berhasil disimpan!


In [6]:
! pip install opencv-python mediapipe scikit-learn joblib




[notice] A new release of pip is available: 24.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import cv2
import mediapipe as mp
import numpy as np
import joblib

# Load model dan label encoder
model = joblib.load("gesture_model.pkl")
le = joblib.load("label_encoder.pkl")

# Setup MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

# Buka webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip biar mirror
    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Gambar landmark di tangan
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Ambil 21 titik x, y, z
            landmark_list = []
            for lm in hand_landmarks.landmark:
                landmark_list.extend([lm.x, lm.y, lm.z])  # total 63 fitur

            if len(landmark_list) == 63:
                # Prediksi
                prediction = model.predict([landmark_list])
                label = le.inverse_transform(prediction)[0]

                # Tampilkan label
                cv2.putText(frame, f'Gesture: {label}', (10, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Tampilkan frame
    cv2.imshow("Real-Time Gesture Recognition", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


