In [65]:
import pandas as pd
import numpy as np
import joblib
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

# ---------------------------
# CONFIG
# ---------------------------
CSV_FILE = "final_gestures_dataset.csv"       
FEATURES = ["gyro_x", "gyro_y", "gyro_z", "acc_x", "acc_y", "acc_z"]

FRAME_LEN = 7
#since each sample had a varying number of frames recorded, I have to adjust
#each sample to a consistent format. The average amount of frames was about 6,
#but this could be changed as needed for time or accuracy purposes. 

# ---------------------------
# STEP 1: Load Data
# ---------------------------
df = pd.read_csv(CSV_FILE)

#drop timestamp (not needed)
df = df.drop(columns=["timestamp"], errors="ignore")

# ---------------------------
# STEP 2: Group into sequences
# ---------------------------
X_sequences = []
y_labels = []

for seq_id, group in df.groupby("sequence_id"):
    data = group[FEATURES].values
    
    #pad or truncate to FRAME_LEN
    if data.shape[0] < FRAME_LEN:
        pad_len = FRAME_LEN - data.shape[0]
        #pad with zeros at the end
        data = np.vstack([data, np.zeros((pad_len, data.shape[1]))])
    elif data.shape[0] > FRAME_LEN:
        data = data[:FRAME_LEN, :]
    
    #flatten sequence into 1D vector
    X_sequences.append(data.flatten())
    
    #grab label (same for whole sequence)
    y_labels.append(group["label"].iloc[0])

X = np.array(X_sequences)
y = np.array(y_labels)

# ---------------------------
# STEP 3: Encode labels
# ---------------------------
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# ---------------------------
# STEP 4: Train-test split
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

#scale features (good for gyro/acc with different ranges)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ---------------------------
# STEP 5: Train Random Forest
# ---------------------------
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

# ---------------------------
# STEP 6: Evaluate
# ---------------------------
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=encoder.classes_))

# ---------------------------
# STEP 7: Export to joblib
# ---------------------------
joblib.dump(clf, "final_gesture_model.joblib")
joblib.dump(scaler, "final_gesture_scaler.joblib")
joblib.dump(encoder, "final_gesture_encoder.joblib")

#test new gesture 

#for anyone reading this- keep in mind that all the code above will essentially be packaged into 1 line
#that is imported into the inferencePipeline file, which also controls joystick/button inputs
#everything below this point will be included in the inferencePipeline file, and may need to be tweaked slightly
#to make sure the if/else statement hierarchy is happy, since that seems to have a noticeable effect on runtime speed
def preprocess_gesture(frames, frame_len=FRAME_LEN):
    """
    frames: list of [gyro_x, gyro_y, gyro_z, acc_x, acc_y, acc_z]
    frame_len: how many frames to pad/truncate to
    """
    data = np.array(frames)
    
    #pad/truncate
    if data.shape[0] < frame_len:
        pad_len = frame_len - data.shape[0]
        data = np.vstack([data, np.zeros((pad_len, data.shape[1]))])
    elif data.shape[0] > frame_len:
        data = data[:frame_len, :]
    
    #flatten
    flat = data.flatten().reshape(1, -1)
    flat = scaler.transform(flat)   # same scaler as training
    return flat

#for testing a new gesture, use pre-recorded for this test
#I used samples from the recording session- 2 of 5 frames, 2 of 6 frames, and 2 of 7 frames of 
#recording to see how the algo could handle padding/truncating.

#obviously these test samples exist solely for debugging and testing the strength of the model. 
#real user inputs will be made using a buffer in the inferencePipeline file.

test_gesture_x = [
    [3.603053, 21.076336, 52.786259, -0.057994, 0.030526, 0.080023],
    [-134.045807, 54.259541, 35.763359, 0.075774, -0.015074, 0.053745],
    [-119.801529, -13.053435, -98.091606, 0.049848, -0.010933, 0.202614],
    [5.396946, -27.80916, -126.374046, -0.020811, -0.04771, 0.20237],
    [118.96183, -13.206107, -29.755726, -0.082188, -0.041297, 0.196308],
    [120.923668, 47.122139, 84.854965, -0.027793, -0.015317, 0.09856],
    [4.305344, 39.190838, 66.71756, 0.014911, 0.032556, 0.066681]
]

test_gesture_c = [
    [35.198475, 53.64122, -4.122138, 0.036317, 0.007875, 0.159531],
    [24.908398, -105.343513, -135.824432, 0.037102, 0.009959, 0.097694],
    [-110.305344, -118.106873, -56.030533, -0.021081, -0.006116, 0.051743],
    [-199.083969, 52.38168, 173.328247, -0.010311, -0.115609, 0.221692],
    [62.030533, 18.366413, 244.183212, 0.040187, -0.121806, 0.199366],
    [147.07634, -34.70229, 69.740456, 0.086328, -0.063352, 0.162508],
    [137.229004, -29.267176, -125.763359, 0.026467, -0.036967, 0.111415],
    [4.038168, -54.580154, -142.2061, -0.116556, 0.03058, 0.080672]
]

test_gesture_v = [
    [-32.435116, -3.679389, 20.763359, 0.007307, 0.013342, 0.089792],
    [-105.412216, 2.145038, -13.679389, -0.013071, -0.01529, 0.109006],
    [-83.931297, 18.946566, -31.564886, 0.025628, -0.020053, 0.188812],
    [13.259542, 26.786259, -43.839695, 0.009309, -0.015967, 0.18984],
    [114.473282, 34.236641, -54.175571, 0.001651, -0.020297, 0.156743],
    [83.511452, -9.893129, -23.580153, -0.014532, -0.002219, 0.101889]
]

test_gesture_n = [
    [-36.129772, -95.435112, 8.274809, -0.063975, -0.005196, 0.15972],
    [170.122131, -22.099237, 5.19084, -0.045383, -0.027847, 0.178393],
    [146.961838, -5.51145, 17.213741, -0.038753, 0.002273, 0.01916],
    [-18.312977, -57.694656, 3.099237, 0.014668, 0.042975, 0.006387],
    [-189.038162, -110.450378, -105.374046, 0.013369, -0.043651, 0.031067],
    [-179.122131, 43.35878, -114.664124, 0.001867, -0.102322, 0.221692]
]

test_gesture_z = [
    [-7.70229, -11.511451, -1.381679, 0.041216, 0.034883, 0.139234],
    [-11.870229, -13.572519, -91.969467, 0.037833, 0.019106, 0.123863],
    [2.961832, 5.885496, -112.549622, -0.050741, 0.00847, 0.113525],
    [-6.694656, 0.541985, 18.335878, -0.089197, 0.023896, 0.091362],
    [-41.335876, 17.778625, 102.969467, -0.050227, -0.002111, 0.085273],
    [-32.076336, 35.763359, 68.526718, 0.049686, -0.005277, 0.159585],
    [-2.396947, -14.946565, 6.610687, 0.065247, -0.006035, 0.147705],
    [0.618321, -0.580153, -97.786263, 0.028767, -0.021298, 0.131413],
    [-19.816793, -22.465649, -88.274811, -0.042677, -0.02165, 0.131062],
    [16.816793, 12.671756, -4.358778, -0.035587, -0.003708, 0.134038]
]

test_gesture_m = [
    [-15.259542, 34.557251, 2.938931, -0.070876, 0.036209, 0.058156],
    [-85.022903, -40.839695, 58.603054, -0.062973, 0.003951, 0.109358],
    [-49.206108, 40.34351, 12.015267, 0.067087, 0.022597, 0.216334],
    [10.083969, -32.122139, -59.244274, 0.048306, -0.001732, 0.14941],
    [18.267176, -25.687023, -60.053436, -0.086869, -0.014478, 0.128301],
    [-22.694656, -17.229008, 35.396946, -0.081457, -0.017509, 0.07369],
    [-91.030533, -29.496183, 74.122139, 0.023652, -0.027495, 0.206673]
]

test_gesture_b = [
    [2.282443, 60.213741, -8.770992, 0.002571, 0.012882, 0.068305],
    [-92.290077, -42.152672, -37.458015, 0.02763, -0.01326, 0.172142],
    [-3.129771, 3.687023, -48.251907, -0.026927, -0.010717, 0.159829],
    [61.442749, -7.198473, -11.564885, -0.07894, -0.004817, 0.143537],
    [53.748093, 17.80916, 25.87023, -0.064624, 0.009228, 0.10105],
    [-8.641221, 8.106871, 23.541985, -0.038293, 0.015534, 0.089873],
    [-52.67939, 31.534351, 12.465649, -0.029416, 0.006901, 0.108871],
    [-79.480919, 12.206107, 1.664122, -0.01548, 0.007036, 0.172981],
    [38.519085, 14.549619, -27.87023, -0.011934, 0.004249, 0.149626]
]

test_gesture_f = [
    [-16.236641, 18.396946, -15.778625, -0.00322, 0.016427, 0.074312],
    [-71.389313, 9.740458, 22.167938, -0.045383, 0.001705, 0.127598],
    [-43.564884, 31.015266, 42.312977, -0.020459, -0.0128, 0.176932],
    [65.229004, -0.664122, 23.221375, 0.03353, -0.013829, 0.163644],
    [62.061069, -28.748091, 17.427481, 0.024924, -0.005791, 0.113201],
    [36.465649, -3.396947, -37.259541, 0.015723, 0.015534, 0.09128],
    [-16.900763, -6.442748, -54.885498, 0.005656, 0.011095, 0.078453],
    [-83.274811, 8.549619, -8.480916, 0.010879, 0.007144, 0.121373],
    [-17.80916, 12.19084, -26.396946, -0.048008, 0.002138, 0.162508],
    [8.908397, -9.465649, -5.206107, -0.016724, -0.007848, 0.13891]
]

test_gesture_g = [
    [7.755725, 22.358778, 33.511452, -0.092606, 0.020838, 0.103918],
    [-15.503817, 79.885498, 131.824432, -0.016833, 0.004168, 0.180125],
    [-8.80916, -7.251908, 50.366413, 0.063515, 0.013423, 0.095637],
    [-72.648857, 1.152672, 4.534351, -0.041026, -0.027035, 0.004979],
    [-101.267174, 40.564884, -9.496183, -0.020973, -0.060294, 0.221692],
    [25.114504, 4.19084, -82.015266, 0.047277, -0.055369, 0.102159],
    [-14.801527, -3.664122, -152.664124, -0.107815, -0.080753, 0.122754],
    [10.641221, 53.206108, 34.954197, -0.035397, -0.041676, 0.221692],
    [194.091599, 12.175572, 57.61832, -0.008254, -0.045654, 0.067439],
    [-23.038168, -24.549618, 0.984733, -0.026575, 0.023571, 0.058752]
]

test_gesture_h = [
    [-10.603053, -2.099237, -2.908397, 0.040377, 0.026954, 0.109412],
    [-12.946565, -32.213741, -52.274811, 0.045843, 0.017509, 0.132198],
    [18.633587, -99.854965, 8.679389, -0.04974, 0.015642, 0.126245],
    [-45.671757, 18.496183, 6.778626, 0.006278, 0.001894, 0.084758],
    [-56.564884, -26.534351, 35.137405, 0.039835, 0.006035, 0.184239],
    [22.763359, 18.106871, 27.229008, -0.006495, -0.008579, 0.165918],
    [10.679389, 15.679389, 53.351147, 0.039565, -0.019458, 0.098289],
    [-10.435115, -0.641221, -4.21374, 0.058644, 0.003599, 0.136285],
    [74.870232, 11.679389, -26.541985, -0.009851, -0.00387, 0.141562],
    [33.595421, -24.358778, -3.801527, 0.001597, 0.011961, 0.062405]
]

test_gesture_j = [
    [19.534351, -10.511451, 18.343512, -0.0397, -0.01391, 0.160478],
    [80.473282, 50.854961, 33.870228, 0.009824, -0.018565, 0.173278],
    [74.274811, -21.152672, 35.763359, 0.033936, -0.007713, 0.092525],
    [-13.091603, -11.297709, -15.610687, 0.077127, 0.016345, 0.071742],
    [-25.893129, -1.931298, -115.259544, 0.037292, 0.007361, 0.158124],
    [-2.977099, -17.183207, -33.114502, -0.054124, 0.013693, 0.124107],
    [-15.59542, -15.267176, 58.190838, -0.049632, -0.000135, 0.107328],
    [-65.099236, -1.687023, 82.847328, 0.020973, -0.008389, 0.142374],
    [-5.198473, -4.160306, 32.564884, 0.062053, -0.006441, 0.162102]
]

test_gesture_l = [
    [-0.076336, -8.343512, 27.328245, 0.055667, 0.040431, 0.119641],
    [-53.015266, -22.450382, -52.511452, 0.074421, -0.016806, 0.053366],
    [-90.908394, 5.259542, -85.526718, -0.017482, 0.002842, 0.190382],
    [-39.572517, -11.396947, 21.221375, -0.087762, 0.015885, 0.117125],
    [-70.091606, 16.625954, 78.274811, -0.007604, -0.034261, 0.112226],
    [6.793893, -0.870229, 25.053434, 0.08235, -0.020161, 0.19574],
    [86.290077, -44.458015, -82.648857, 0.004601, -0.02828, 0.160045],
    [97.503815, 53.587788, -64.221375, -0.050065, -0.002869, 0.043895],
    [-3.908397, -32.488548, -9.503817, -0.05315, 0.041892, 0.103377]
]


test_gesture_bad0 = [
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0],
]
test_gesture_badp1 = [
    [1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 1, 1],
]
test_gesture_badn1 = [
    [-1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1],
]
test_gesture_bad50 = [
    [50, 50, 50, 0.5, 0.5, 0.5],
    [50, 50, 50, 0.5, 0.5, 0.5],
    [50, 50, 50, 0.5, 0.5, 0.5],
]

#input any test_gesture here to see results
#X_new = preprocess_gesture(test_gesture_badn1)
#pred = clf.predict(X_new)[0]
#gesture = encoder.inverse_transform([pred])[0]
#print("Predicted gesture:", gesture)

#test
X_new = preprocess_gesture(test_gesture_l)

# Measure prediction time
start_time = time.perf_counter()
probs = clf.predict_proba(X_new)[0]
end_time = time.perf_counter()

# Compute elapsed time
elapsed_ms = (end_time - start_time) * 1000 
print(f"Prediction time: {elapsed_ms:.3f} ms")

# Get class probabilities
probs = clf.predict_proba(X_new)[0]
max_prob = np.max(probs)
pred_class = np.argmax(probs)

# Threshold for "failed" gesture
THRESHOLD = 0.6  

if max_prob < THRESHOLD:
    gesture = "FAILED"
else:
    gesture = encoder.inverse_transform([pred_class])[0]

print("Predicted gesture:", gesture, "| Confidence:", max_prob)

              precision    recall  f1-score   support

           b       1.00      1.00      1.00        18
           c       0.89      0.94      0.92        18
           f       0.95      1.00      0.97        18
           g       1.00      0.95      0.97        19
           h       1.00      0.89      0.94        19
           j       1.00      1.00      1.00        18
           l       1.00      0.95      0.97        19
           m       0.95      1.00      0.97        18
           n       0.95      1.00      0.97        19
           v       1.00      1.00      1.00        18
           x       1.00      1.00      1.00        15
           z       0.94      0.94      0.94        18

    accuracy                           0.97       217
   macro avg       0.97      0.97      0.97       217
weighted avg       0.97      0.97      0.97       217

Prediction time: 9.366 ms
Predicted gesture: l | Confidence: 0.945
