In [16]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier

# --- Step 1: Load dataset ---
df = pd.read_csv("hand_landmarks_data.csv")

# --- Step 2: Map labels ---
def map_label(label):
    if label == "like":
        return "up"
    elif label == "dislike":
        return "down"
    elif label == "peace":
        return "right"
    elif label == "stop":
        return "left"
    else:
        return None  # will drop these rows later

df['label'] = df['label'].apply(map_label)
df = df.dropna(subset=['label']).reset_index(drop=True)

# --- Step 3: Center landmarks by wrist (x1, y1, z1) ---
wrist_x = df['x1'].copy()
wrist_y = df['y1'].copy()
wrist_z = df['z1'].copy()

for i in range(1, 22):
    df[f'x{i}'] = df[f'x{i}'] - wrist_x
    df[f'y{i}'] = df[f'y{i}'] - wrist_y
    df[f'z{i}'] = df[f'z{i}'] - wrist_z

# --- Step 4: Normalize by max absolute value per row ---
epsilon = 1e-8
feature_cols = [col for col in df.columns if col != 'label']
max_value = df[feature_cols].abs().max(axis=1) + epsilon

for col in feature_cols:
    df[col] = df[col] / max_value

# --- Step 5: Prepare features and labels ---
X = df[feature_cols]
y = df['label']

# --- Step 6: Encode labels ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Save label encoder for inference
joblib.dump(label_encoder, "label_encoder.pkl")

# --- Step 7: Train/test split ---
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# --- Step 8: Train model ---
model = XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# --- Step 9: Evaluate model ---
preds = model.predict(X_test)
acc = accuracy_score(y_test, preds)
f1 = f1_score(y_test, preds, average="macro")
print(f"Accuracy: {acc:.4f}, F1-score: {f1:.4f}")

# --- Step 10: Save trained model ---
joblib.dump(model, "gesture_model.pkl")



# --- Inference function ---
def predict_gesture(landmarks: list):
    """
    landmarks: list of 63 floats (x1,y1,z1, x2,y2,z2, ..., x21,y21,z21)
    Returns: predicted label string (e.g. 'up', 'down', 'right', 'left')
    """
    if len(landmarks) != 63:
        raise ValueError("Input landmarks list must have 63 floats.")

    # Load model and label encoder
    model = joblib.load("gesture_model.pkl")
    label_encoder = joblib.load("label_encoder.pkl")

    # Convert to DataFrame with columns x1,y1,z1, x2,y2,z2, ..., x21,y21,z21
    cols = []
    for i in range(1, 22):
        cols += [f'x{i}', f'y{i}', f'z{i}']
    df_input = pd.DataFrame([landmarks], columns=cols)

    # Center by wrist (x1,y1,z1)
    wrist_x = df_input.loc[0, 'x1']
    wrist_y = df_input.loc[0, 'y1']
    wrist_z = df_input.loc[0, 'z1']
    for i in range(1, 22):
        df_input[f'x{i}'] -= wrist_x
        df_input[f'y{i}'] -= wrist_y
        df_input[f'z{i}'] -= wrist_z

    # Normalize by max absolute value
    max_val = df_input.abs().max(axis=1).values[0] + 1e-8
    for col in cols:
        df_input[col] /= max_val

    # Predict
    pred_encoded = model.predict(df_input)[0]
    pred_label = label_encoder.inverse_transform([pred_encoded])[0]

    return pred_label


# --- Example usage of inference ---
if __name__ == "__main__":
    # Example dummy landmarks input (63 floats)
    dummy_landmarks = [0.1, 0.2, 0.3]*21  # replace with real data

    pred = predict_gesture(dummy_landmarks)
    print("Predicted gesture:", pred)


Accuracy: 0.9982, F1-score: 0.9982
Predicted gesture: down


In [17]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd

# Load model and label encoder once on startup
model = joblib.load("gesture_model.pkl")
label_encoder = joblib.load("label_encoder.pkl")

app = FastAPI()

class Landmarks(BaseModel):
    landmarks: list[float]

@app.post("/predict")
def predict(data: Landmarks):
    if len(data.landmarks) != 63:
        raise HTTPException(status_code=400, detail="Invalid number of landmarks, expected 63 floats")

    # Convert input to DataFrame with columns x1,y1,z1,... x21,y21,z21
    cols = []
    for i in range(1, 22):
        cols += [f'x{i}', f'y{i}', f'z{i}']
    df_input = pd.DataFrame([data.landmarks], columns=cols)

    # Center by wrist (x1, y1, z1)
    wrist_x = df_input.loc[0, 'x1']
    wrist_y = df_input.loc[0, 'y1']
    wrist_z = df_input.loc[0, 'z1']

    for i in range(1, 22):
        df_input[f'x{i}'] -= wrist_x
        df_input[f'y{i}'] -= wrist_y
        df_input[f'z{i}'] -= wrist_z

    # Normalize by max absolute value
    max_val = df_input.abs().max(axis=1).values[0] + 1e-8
    for col in cols:
        df_input[col] /= max_val

    # Predict
    pred_index = model.predict(df_input)[0]
    gesture = label_encoder.inverse_transform([pred_index])[0]

    return {"prediction": gesture}


In [18]:
landmarks = [round(i * 0.01, 2) for i in range(1, 64)]
print(landmarks)

[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63]
