In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
import os

np.random.seed(42)

# XGBoost for EMG-to-Finger Position Prediction

XGBoost (eXtreme Gradient Boosting) is a powerful machine learning algorithm that uses gradient boosting framework.
Unlike LSTMs, XGBoost doesn't naturally handle sequential data, so we'll create features from time windows.

Key advantages:
- Fast training and inference
- Excellent performance on tabular data
- Built-in feature importance
- Less prone to overfitting with proper regularization

In [None]:
# Load data
data_path = "../../data/afras/raw/integrated_data_20251106_213536.csv"
df = pd.read_csv(data_path)

print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst 5 rows:")
print(df.head())

# Check for missing values
print(f"\nMissing values:\n{df.isnull().sum()}")

In [None]:
def create_windowed_features(df, window_size=10, target_cols=['thumb_tip', 'thumb_base', 'index', 'middle', 'ring', 'pinky']):
    """
    Create features from time windows of EMG data.
    For each time step, we include the current and previous window_size-1 samples.
    
    Args:
        df: DataFrame with EMG and finger position data
        window_size: Number of time steps to include in each window
        target_cols: Columns to predict
    
    Returns:
        X: Feature matrix
        y: Target matrix
        feature_names: Names of the features
    """
    # EMG features (env and raw signals)
    emg_cols = ['env0', 'raw0', 'env1', 'raw1', 'env2', 'raw2', 'env3', 'raw3']
    
    X_list = []
    y_list = []
    
    for i in range(window_size - 1, len(df)):
        # Extract window of EMG data
        window = df.iloc[i - window_size + 1:i + 1][emg_cols].values
        
        # Flatten the window into a single feature vector
        features = window.flatten()
        
        # Get target values (current finger positions)
        targets = df.iloc[i][target_cols].values
        
        X_list.append(features)
        y_list.append(targets)
    
    X = np.array(X_list)
    y = np.array(y_list)
    
    # Create feature names
    feature_names = []
    for t in range(window_size):
        for col in emg_cols:
            feature_names.append(f"{col}_t-{window_size-1-t}")
    
    return X, y, feature_names, target_cols

# Create windowed features
window_size = 10
X, y, feature_names, target_names = create_windowed_features(df, window_size=window_size)

print(f"\nFeature matrix shape: {X.shape}")
print(f"Target matrix shape: {y.shape}")
print(f"Number of features: {len(feature_names)}")
print(f"Number of targets: {len(target_names)}")
print(f"Target columns: {target_names}")