In [37]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# ---------------------------
# CONFIG
# ---------------------------
CSV_FILE = "gesture_samples.csv"       
FEATURES = ["gyro_x", "gyro_y", "gyro_z", "acc_x", "acc_y", "acc_z"]

FRAME_LEN = 6
#since each sample had a varying number of frames recorded, I have to adjust
#each sample to a consistent format. The average amount of frames was about 6,
#but this could be changed as needed for time or accuracy purposes. 

# ---------------------------
# STEP 1: Load Data
# ---------------------------
df = pd.read_csv(CSV_FILE)

#drop timestamp (not needed)
df = df.drop(columns=["timestamp"], errors="ignore")

# ---------------------------
# STEP 2: Group into sequences
# ---------------------------
X_sequences = []
y_labels = []

for seq_id, group in df.groupby("sequence_id"):
    data = group[FEATURES].values
    
    #pad or truncate to FRAME_LEN
    if data.shape[0] < FRAME_LEN:
        pad_len = FRAME_LEN - data.shape[0]
        #pad with zeros at the end
        data = np.vstack([data, np.zeros((pad_len, data.shape[1]))])
    elif data.shape[0] > FRAME_LEN:
        data = data[:FRAME_LEN, :]
    
    #flatten sequence into 1D vector
    X_sequences.append(data.flatten())
    
    #grab label (same for whole sequence)
    y_labels.append(group["label"].iloc[0])

X = np.array(X_sequences)
y = np.array(y_labels)

# ---------------------------
# STEP 3: Encode labels
# ---------------------------
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# ---------------------------
# STEP 4: Train-test split
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

#scale features (good for gyro/acc with different ranges)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ---------------------------
# STEP 5: Train Random Forest
# ---------------------------
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

# ---------------------------
# STEP 6: Evaluate
# ---------------------------
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=encoder.classes_))

# ---------------------------
# STEP 7: Export to joblib
# ---------------------------
joblib.dump(clf, "beta_gesture_model.joblib")
joblib.dump(scaler, "beta_gesture_scaler.joblib")
joblib.dump(encoder, "beta_gesture_encoder.joblib")

#test new gesture 

#for anyone reading this- keep in mind that all the code above will essentially be packaged into 1 line
#that is imported into the inferencePipeline file, which also controls joystick/button inputs
#everything below this point will be included in the inferencePipeline file, and may need to be tweaked slightly
#to make sure the if/else statement hierarchy is happy, since that seems to have a noticeable effect on runtime speed
def preprocess_gesture(frames, frame_len=FRAME_LEN):
    """
    frames: list of [gyro_x, gyro_y, gyro_z, acc_x, acc_y, acc_z]
    frame_len: how many frames to pad/truncate to
    """
    data = np.array(frames)
    
    #pad/truncate
    if data.shape[0] < frame_len:
        pad_len = frame_len - data.shape[0]
        data = np.vstack([data, np.zeros((pad_len, data.shape[1]))])
    elif data.shape[0] > frame_len:
        data = data[:frame_len, :]
    
    #flatten
    flat = data.flatten().reshape(1, -1)
    flat = scaler.transform(flat)   # same scaler as training
    return flat

#for testing a new gesture, use pre-recorded for this test
#I used samples from the recording session- 2 of 5 frames, 2 of 6 frames, and 2 of 7 frames of 
#recording to see how the algo could handle padding/truncating.

#obviously these test samples exist solely for debugging and testing the strength of the model. 
#real user inputs will be made using a buffer in the inferencePipeline file.

test_gesture_z = [
    [39.862595, -26.358778, 7.664122, -0.080185, -0.029011, 0.125],
    [-30.832062, 93.984734, -203.259537, 0.02552, -0.008849, 0.072066],
    [-7.274809, 123.954201, -250.137405, 0.108979, 0.043353, 0.10519],
    [-0.267176, 64.160309, 21.450382, -0.037454, 0.056587, 0.121617],
    [66.045799, -79.946564, 250.129776, -0.148841, 0.010229, 0.079427],
    [7.160306, -5.503817, 125.343513, -0.073961, 0.045221, 0.075693],
    [63.213741, -46.786259, -250.137405, 0.007009, 0.086328, 0.152901]

] 
test_gesture_x = [
    [-24.274809, -1.587786, 20.862595, -0.011177, 0.00414, 0.068278],
    [75.35878, -13.206107, 45.916031, -0.054557, 0.026034, 0.125],
    [58.572517, 66.938934, -11.854961, -0.015317, 0.036047, 0.151142],
    [18.061069, -21.229008, -98.435112, 0.056018, 0.032962, 0.130006],
    [-66.251907, -35.61832, -56.694656, 0.025195, 0.006441, 0.057317],
    [-85.145035, -11.488549, 44.38168, -0.040647, -0.036155, 0.066113]

] 
test_gesture_c = [
    [-11.541985, 10.122137, -40.305344, 0.058616, 0.001245, 0.066546],
    [99.618324, 30.038168, -26.648855, 0.004168, 0.034098, 0.154984],
    [50.091602, -20.083969, 85.39695, -0.080401, 0.03648, 0.145513],
    [-68.832062, -10.526717, 73.328247, -0.024302, 0.018862, 0.073311],
    [-99.610687, -9.297709, -27.167938, 0.019864, -0.0082, 0.066302]

] 
test_gesture_v = [
    [-0.465649, -3.442748, -2.992366, -0.004465, -0.011664, 0.010879],
    [59.083969, 13, -70.96183, -0.003139, 0.047142, 0.106083],
    [163.71756, -2.648855, -103.351143, -0.018673, 0.089792, 0.192249],
    [56.145039, -38.206108, -76.687019, -0.013693, 0.076315, 0.160045],
    [-165.656494, -49.786259, -80.083969, 0.009282, 0.011528, 0.039727]

] 
test_gesture_b = [
    [38.473282, -4.396946, -16.824427, -0.01161, 0.028983, 0.099697],
    [92.175575, 32.870228, -28, -0.010121, 0.048468, 0.207728],
    [80.427483, 11.450381, -129.236649, 0.05729, 0.031852, 0.149437],
    [-113.839691, 16.183207, -74.458015, 0.048955, 0.00046, 0.044788],
    [-71.702293, -31.900763, 11.625955, 0.007117, -0.015858, 0.032853],
    [33.786259, -68.244278, 93.145035, -0.017428, 0.012963, 0.106029],
    [56.137405, 14.122137, 60.717556, -0.045112, 0.016237, 0.168894]

] 
test_gesture_n = [
    [15.664123, 15.908397, 9.580153, -0.012151, 0.011718, 0.146162],
    [-99.694656, 6.931298, -10.167939, 0.017211, 0.018483, 0.050146],
    [-70.870232, -29.358778, -44.839695, -0.001894, -0.02809, 0.052392],
    [-32.053436, 31.022902, -34.786259, -0.014045, -0.026683, 0.022218],
    [19.366413, 48.847328, -37.282444, -0.009553, 0.021758, 0.066383],
    [153.816788, 21.541985, -28.732824, -0.032231, 0.051174, 0.221686]
] 
test_gesture_bad0 = [
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0],
]
test_gesture_badp1 = [
    [1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 1, 1],
]
test_gesture_badn1 = [
    [-1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1],
]
#input any test_gesture here to see results
#X_new = preprocess_gesture(test_gesture_badn1)
#pred = clf.predict(X_new)[0]
#gesture = encoder.inverse_transform([pred])[0]
#print("Predicted gesture:", gesture)

#test
X_new = preprocess_gesture(test_gesture_n)

# Get class probabilities
probs = clf.predict_proba(X_new)[0]
max_prob = np.max(probs)
pred_class = np.argmax(probs)

# Threshold for "failed" gesture
THRESHOLD = 0.6  

if max_prob < THRESHOLD:
    gesture = "FAILED"
else:
    gesture = encoder.inverse_transform([pred_class])[0]

print("Predicted gesture:", gesture, "| Confidence:", max_prob)

              precision    recall  f1-score   support

           b       1.00      0.95      0.97        39
           c       1.00      1.00      1.00        20
           n       0.96      1.00      0.98        26
           v       1.00      0.96      0.98        23
           x       1.00      1.00      1.00        20
           z       0.92      1.00      0.96        22

    accuracy                           0.98       150
   macro avg       0.98      0.98      0.98       150
weighted avg       0.98      0.98      0.98       150

Predicted gesture: n | Confidence: 0.995
