# XGBoost Model to Predict Hand Movements Based On EMG Data


## Load Dataset


In [None]:
import pandas as pd

df = pd.read_csv("../calib_rec_tool/outputV2.csv")
df = df.drop(columns=[df.columns[0]])

features_to_keep = [
    "sensor1",
    "sensor2",
    "sensor3",
    "sensor4",
    "sensor5",
    "sensor6",
    "sensor7",
    "sensor8",
]
X, Y = df[features_to_keep].to_numpy(), df.label.to_numpy()

## Preprocessing


### Butterworth filter functions


In [None]:
from scipy.signal import butter, filtfilt


def butter_lowpass(cutoff, fs, order=4):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype="low", analog=False)
    return b, a


def lowpass_filter(data, cutoff=20, fs=1000, order=4):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, data, axis=0)
    return y

### Segmenting the signal


In [None]:
import numpy as np


def segment_signal(data, labels, window_size, overlap):
    segments = []
    segment_labels = []
    for start in range(0, len(data) - window_size, window_size - overlap):
        segment = data[start : start + window_size]
        segment_label = labels[start]
        segments.append(segment)
        segment_labels.append(segment_label)
    return np.array(segments), np.array(segment_labels)

### Extract features


In [None]:
def extract_features(segment):
    features = [
        np.mean(segment, axis=0),
        np.var(segment, axis=0),
        np.max(segment, axis=0),
        np.min(segment, axis=0),
    ]
    return np.concatenate(features)

### Preprocess Data


In [None]:
def preprocess_emg_data(
    data: pd.DataFrame, cutoff: int = 20, fs: int = 1000, window_size: int = 50
):
    filtered_data = lowpass_filter(data, cutoff, fs)
    rectified_data = np.abs(filtered_data)
    smoothed_data = np.array(
        [
            np.convolve(signal, np.ones(window_size) / window_size, mode="valid")
            for signal in rectified_data.T
        ]
    ).T
    return smoothed_data

## Model


In [None]:
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.metrics import accuracy_score


class XGBoostClassifier:
    def __init__(self, seed=42):
        self.seed = seed
        self.clf = xgb.XGBClassifier()
        self.le = LabelEncoder()
        np.random.seed(seed)

    def train(self, X_train, y_train):
        y_train = self.le.fit_transform(y_train)
        self.clf.fit(X_train, y_train)

    def predict(self, X):
        return self.clf.predict(X)

    def accuracy(self, y_true, y_pred):
        return accuracy_score(y_true, y_pred)

## Hyperparameters


In [None]:
from dataclasses import dataclass
from typing import List


@dataclass
class Hyperparameters:
    seed: int = 42
    window_sizes: List[int] = [150, 200, 450]
    overlaps: List[int] = [50]
    cutoffs: List[int] = [10, 20]
    sampling_frequencies: List[int] = [100, 200]
    order: int = 4

## Training


In [None]:
from sklearn.model_selection import train_test_split

hyperparams = Hyperparameters()
model = XGBoostClassifier(seed=hyperparams.seed)
final_results = []

for w_s in hyperparams.window_sizes:
    for ol in hyperparams.overlaps:
        for coff in hyperparams.cutoffs:
            for f_s in hyperparams.sampling_frequencies:
                print(
                    f"Starting:\nWindow Size: {w_s} | Overlap: {ol} | Cutoff: {coff} | fs: {f_s}"
                )

                preprocessed_emg_data = preprocess_emg_data(
                    X, cutoff=coff, fs=f_s, window_size=w_s
                )
                segments, segment_labels = segment_signal(
                    preprocessed_emg_data, y, w_s, ol
                )
                features = np.array([extract_features(segment) for segment in segments])

                X_train, X_test, y_train, y_test = train_test_split(
                    features, segment_labels, test_size=0.4, random_state=model.seed
                )
                model.train(X_train, y_train)

                y_pred_train = model.predict(X_train)
                train_accuracy = model.accuracy(y_train, y_pred_train)
                print(f"Accuracy train: {train_accuracy * 100:.2f}%")

                y_pred_test = model.predict(X_test)
                test_accuracy = model.accuracy(y_test, y_pred_test)
                print(f"Accuracy test: {test_accuracy * 100:.2f}%")

                # Validate the model
                for validation_file, class_label in [
                    ("../calib_rec_tool/data_files/validation_moove_0.csv", 19),
                    ("../calib_rec_tool/data_files/validation_moove_19.csv", 11),
                ]:
                    df_validation = pd.read_csv(validation_file).drop(
                        columns=[df.columns[0]]
                    )
                    X_val, y_val = (
                        df_validation[features_to_keep].to_numpy(),
                        df_validation.label.to_numpy(),
                    )

                    preprocessed_emg_data = preprocess_emg_data(X_val)
                    segments, segment_labels = segment_signal(
                        preprocessed_emg_data, y_val, w_s, ol
                    )
                    features = np.array(
                        [extract_features(segment) for segment in segments]
                    )

                    X_train, X_test, y_train, y_test = train_test_split(
                        features, segment_labels, test_size=0.9, random_state=model.seed
                    )
                    y_pred = model.predict(X_test)
                    val_accuracy = model.accuracy(y_test, y_pred)
                    print(
                        f"Accuracy validation (full class {class_label}): {val_accuracy * 100:.2f}%"
                    )
                    final_results.append(
                        ([w_s, ol, coff, f_s], round(val_accuracy * 100, 2))
                    )

final_results = sorted(final_results, key=lambda x: x[1], reverse=True)
for params, result in final_results:
    print(f"Params: {params}, Result: {result}")