In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import mediapipe as mp
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load and preprocess the data
df = pd.read_csv('hand_landmarks_data.csv')

# Map specific gestures to directions
label_mapping = {
    # 'one': 'left',
    # 'two_up': 'right',
    # 'three': 'up',
    # 'fist': 'down'
     'one': 'left',
     'two_up': 'right',
     'like': 'up',
     'dislike': 'down'
}

# Apply label mapping
df['label'] = df['label'].map(label_mapping)

# Drop rows with unmapped labels (NaN)
df = df.dropna(subset=['label']).reset_index(drop=True)

# Normalize landmarks
def normalize_landmarks(row):
    wrist_x, wrist_y = row[0], row[1]
    mid_finger_tip_x, mid_finger_tip_y = row[36], row[37]

    normalized = np.array(row[:-1])
    normalized[0::3] -= wrist_x
    normalized[1::3] -= wrist_y

    scale_factor = np.sqrt((mid_finger_tip_x - wrist_x)**2 + (mid_finger_tip_y - wrist_y)**2)
    if scale_factor > 0:
        normalized /= scale_factor

    return np.append(normalized, row[-1])

df_normalized = df.apply(normalize_landmarks, axis=1, result_type='expand')
df_normalized.columns = df.columns

# Split into features and labels
X = df_normalized.iloc[:, :-1]
y = df_normalized.iloc[:, -1]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

# SVM
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)

# KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred_knn)

# Print accuracies
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print(f"KNN Accuracy: {knn_accuracy:.4f}")

# Save the best model (example: Random Forest)
joblib.dump(rf, 'best_model.pkl')
print("Best model saved as 'best_model.pkl'")

# Print classification reports
print("\nRandom Forest Report:")
print(classification_report(y_test, y_pred_rf))

# Real-time Gesture Recognition
# Load best model
model = joblib.load('best_model.pkl')

# Reuse label_mapping if needed for reverse mapping (not needed here since model is trained on final labels)

# Initialize MediaPipe
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Function to normalize landmarks for real-time input
def normalize_landmarks_real(landmarks):
    wrist_x, wrist_y = landmarks[0], landmarks[1]
    mid_finger_tip_x, mid_finger_tip_y = landmarks[36], landmarks[37]

    normalized = np.array(landmarks)
    normalized[0::3] -= wrist_x
    normalized[1::3] -= wrist_y

    scale_factor = np.sqrt((mid_finger_tip_x - wrist_x)**2 + (mid_finger_tip_y - wrist_y)**2)
    if scale_factor > 0:
        normalized /= scale_factor

    return normalized

# Start webcam
cap = cv2.VideoCapture(0)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.5)

print("Press 'q' to quit.")
while True:
    ret, image = cap.read()
    if not ret:
        break

    image_rgb = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image_bgr, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            landmarks = []
            for i in range(21):
                landmarks.append(hand_landmarks.landmark[i].x)
                landmarks.append(hand_landmarks.landmark[i].y)
                landmarks.append(hand_landmarks.landmark[i].z)

            normalized_landmarks = normalize_landmarks_real(landmarks)
            prediction = model.predict([normalized_landmarks])[0]

            cv2.putText(image_bgr, f"Gesture: {prediction}", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('Hand Gesture Direction Recognition', image_bgr)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Random Forest Accuracy: 0.9991
SVM Accuracy: 0.9991
KNN Accuracy: 0.9991
Best model saved as 'best_model.pkl'

Random Forest Report:
              precision    recall  f1-score   support

        down       1.00      1.00      1.00       274
        left       1.00      1.00      1.00       222
       right       1.00      1.00      1.00       272
          up       1.00      1.00      1.00       300

    accuracy                           1.00      1068
   macro avg       1.00      1.00      1.00      1068
weighted avg       1.00      1.00      1.00      1068

Press 'q' to quit.


