# AetherManipulator - Gesture Model Training

## 1. Setup and Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

## 2. Data Generation (Simulated)

Since we don't have a pre-collected dataset of hand landmarks mapped to gestures (translate, rotate, scale), we'll generate synthetic data for demonstration purposes. In a real scenario, you would collect data using MediaPipe, extract relevant features (e.g., distances between landmarks, angles, hand openness), and label them according to the intended gesture.

In [None]:
def generate_synthetic_data(n_samples=1000, n_features=42):
    """Generates synthetic data simulating hand landmark features.
    
    Args:
        n_samples (int): Number of data points to generate.
        n_features (int): Number of features (e.g., 21 landmarks * 2 coords).
        
    Returns:
        tuple: (X, y) - features and labels.
    """
    # Simulate features (e.g., normalized landmark coordinates relative to wrist)
    X = np.random.rand(n_samples, n_features) 
    
    # Simulate labels (0: translate, 1: rotate, 2: scale)
    # Assign labels based on some arbitrary feature patterns for reproducibility
    y = np.zeros(n_samples, dtype=int)
    
    # Rule 1: High sum of first 10 features -> Rotate (fist-like)
    mask_rotate = X[:, :10].sum(axis=1) > 5
    y[mask_rotate] = 1
    
    # Rule 2: High variance in last 10 features -> Scale (two hands / spread?)
    # Ensure we only modify non-rotate samples
    mask_scale = (X[:, -10:].var(axis=1) > 0.08) & (~mask_rotate)
    y[mask_scale] = 2
    
    # Remaining samples are Translate (default 0)
    
    print(f"Generated {n_samples} samples.")
    print(f"Class distribution: {np.bincount(y)}")
    
    return X, y

# Generate data
X, y = generate_synthetic_data(n_samples=2000, n_features=42) # 21 landmarks * 2 coords (x, y)

# Create a DataFrame (optional, but good practice)
feature_names = [f'feature_{i}' for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feature_names)
df['label'] = y

print("\nSample data head:")
print(df.head())
print("\nData shape:")
print(df.shape)

## 3. Data Splitting and Preprocessing

In [None]:
# Separate features and labels
X = df.drop('label', axis=1).values
y = df['label'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
print(f"Testing set shape: X={X_test.shape}, y={y_test.shape}")

# Define preprocessing steps (Standard Scaling)
scaler = StandardScaler()

## 4. Model Definition and Training Pipeline

In [None]:
# Define the model (Random Forest Classifier)
model = RandomForestClassifier(random_state=42)

# Create a pipeline including scaling and the model
pipeline = Pipeline([
    ('scaler', scaler),
    ('classifier', model)
])

# Train the basic pipeline
print("Training the initial model...")
pipeline.fit(X_train, y_train)
print("Initial model training complete.")

## 5. Hyperparameter Tuning (Grid Search)

In [None]:
# Define the parameter grid for RandomForestClassifier
param_grid = {
    'classifier__n_estimators': [50, 100, 200], # Number of trees
    'classifier__max_depth': [None, 10, 20],    # Maximum depth of trees
    'classifier__min_samples_split': [2, 5],   # Min samples to split node
    'classifier__min_samples_leaf': [1, 3]     # Min samples in leaf node
}

# Setup GridSearchCV
# cv=3 for faster execution, use cv=5 or more for better results
grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='accuracy')

print("Starting Hyperparameter Tuning (GridSearchCV)...")
grid_search.fit(X_train, y_train)

print("\nGridSearchCV complete.")
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_:.4f}")

# Get the best estimator
best_model_pipeline = grid_search.best_estimator_

## 6. Model Evaluation

In [None]:
# Evaluate the best model on the test set
y_pred = best_model_pipeline.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Set Accuracy: {accuracy:.4f}")

# Generate classification report
print("\nClassification Report:")
class_names = ['Translate (0)', 'Rotate (1)', 'Scale (2)']
print(classification_report(y_test, y_pred, target_names=class_names))

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

## 7. Results Visualization

In [None]:
# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Plot Learning Curve
train_sizes, train_scores, test_scores = learning_curve(
    best_model_pipeline, X, y, cv=3, n_jobs=-1, 
    train_sizes=np.linspace(0.1, 1.0, 10), scoring='accuracy'
)

train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.figure(figsize=(10, 6))
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.1,
                 color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
         label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
         label="Cross-validation score")

plt.xlabel("Training examples")
plt.ylabel("Accuracy Score")
plt.title("Learning Curve")
plt.legend(loc="best")
plt.grid()
plt.show()

## 8. Save the Trained Model

In [None]:
# Define the filename for the saved model
model_filename = 'aether_manipulator_gesture_model.joblib'

# Save the entire pipeline (including scaler and model)
joblib.dump(best_model_pipeline, model_filename)

print(f"\nModel pipeline saved to {model_filename}")

# Example of loading the model back (for verification)
loaded_pipeline = joblib.load(model_filename)
print("\nModel pipeline loaded successfully.")

# Verify loaded model by predicting a sample
sample_pred = loaded_pipeline.predict(X_test[0].reshape(1, -1))
print(f"Prediction for first test sample: {sample_pred[0]} (True label: {y_test[0]})")

## End of Training Notebook