In [None]:
import pickle
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import joblib
import os

In [None]:
# load squat data
with open("../data/data_3D.pickle", "rb") as f:
    data = pickle.load(f)

    labels = data["labels"]
    poses = data["poses"]

print(f"Loaded labels: {labels.shape}, dtype={labels.dtype}")
print(f"Loaded poses:  {poses.shape}, dtype={poses.dtype}")

Loaded labels: (29789, 5), dtype=<U6
Loaded poses:  (29789, 3, 25), dtype=float64
[['SQUAT' 'Hugues' '1' '1' '123']
 ['SQUAT' 'Hugues' '1' '1' '124']
 ['SQUAT' 'Hugues' '1' '1' '125']
 ...
 ['Plank' 'Isinsu' '8' '10' '59724']
 ['Plank' 'Isinsu' '8' '10' '59725']
 ['Plank' 'Isinsu' '8' '10' '59726']]


In [None]:
action_col = 0
subject_col = 1
instruction_col = 2
seq_col = 3

# filter to squat only
mask_squat = labels[:, action_col] == "SQUAT"
labels_squat = labels[mask_squat]

print(f"\nTotal frames in dataset: {labels.shape[0]}")
print(f"Total SQUAT frames: {labels_squat.shape[0]}")

codes = np.unique(labels_squat[:, instruction_col])
print("Unique squat instruction codes:", codes)

code_seq_counts: dict[str, int] = {}
for c in codes:
    mask_c = labels_squat[:, instruction_col] == c
    subj = labels_squat[mask_c, subject_col]
    seq_id = labels_squat[mask_c, seq_col]

    subj_seq = [f"{s}_{q}" for s, q in zip(subj, seq_id)]
    unique_subj_seq = set(subj_seq)


Total frames in dataset: 29789
Total SQUAT frames: 11109
Unique squat instruction codes: ['1' '10' '2' '3' '4' '5']


In [None]:
# drop instruction code 10
action_col = 0
instruction_col = 2

mask_squat = labels[:, action_col] == "SQUAT"
labels_squat = labels[mask_squat]
poses_squat = poses[mask_squat]

mask_valid = np.isin(labels_squat[:, instruction_col], ['1', '2', '3', '4', '5'])

labels_squat = labels_squat[mask_valid]
poses_squat  = poses_squat[mask_valid]

print("Remaining codes:", np.unique(labels_squat[:, instruction_col]))

N_sq, C, J = poses_squat.shape
print(f"Squat poses shape: N={N_sq}, C={C}, J={J}")

root_idx = 0
root_sq = poses_squat[:, :, root_idx][:, :, None]
poses_sq_centered = poses_squat - root_sq

X_squat = poses_sq_centered.reshape(N_sq, -1)

instr_codes_sq = labels_squat[:, instruction_col]
unique_codes = np.unique(instr_codes_sq)
print("Unique squat instruction codes:", unique_codes)

Remaining codes: ['1' '2' '3' '4' '5']
Squat poses shape: N=10283, C=3, J=25
Unique squat instruction codes: ['1' '2' '3' '4' '5']


In [48]:
# train/val/test split
model_name = "ec3d_squat_instruction_mlp.pkl"
code_map_name = "ec3d_squat_instruction_code_map.pkl"

codes_sq = sorted(list(set(instr_codes_sq)))
code_to_id_sq = {c: i for i, c in enumerate(codes_sq)}
id_to_code_sq = {i: c for c, i in code_to_id_sq.items()}

y_squat = np.array([code_to_id_sq[c] for c in instr_codes_sq], dtype=int)

X_train, X_temp, y_train, y_temp = train_test_split(
    X_squat, y_squat,
    test_size=0.3,
    stratify=y_squat,
    random_state=42,
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp,
    test_size=0.5,
    stratify=y_temp,
    random_state=42,
)
print(f"Train: {X_train.shape[0]} frames")
print(f"Val: {X_val.shape[0]} frames")
print(f"Test: {X_test.shape[0]} frames")


Train: 7198 frames
Val: 1542 frames
Test: 1543 frames


In [49]:
# MLP classifier: multi-class
clf_squat = make_pipeline(
    StandardScaler(),
    MLPClassifier(
        hidden_layer_sizes=(128, 64),
        activation="relu",
        alpha=1e-4,
        max_iter=400,
        early_stopping=True,
        n_iter_no_change=20,
        random_state=42,
    ),
)

clf_squat.fit(X_train, y_train)

0,1,2
,steps,"[('standardscaler', ...), ('mlpclassifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,hidden_layer_sizes,"(128, ...)"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,400
,shuffle,True


In [51]:
# evaluation
target_names_sq = [f"instr_{id_to_code_sq[i]}" for i in sorted(id_to_code_sq.keys())]

print("squat: validation performance")
y_val_pred = clf_squat.predict(X_val)
print(classification_report(y_val, y_val_pred, target_names=target_names_sq))

print("squat: test performance")
y_test_pred = clf_squat.predict(X_test)
print(classification_report(y_test, y_test_pred, target_names=target_names_sq))

# save model + id<->code mapping
models_dir = "../models/squat"
os.makedirs(models_dir, exist_ok=True)
model_path = os.path.join(models_dir, model_name)
code_map_path = os.path.join(models_dir, code_map_name)

joblib.dump(clf_squat, model_path)
joblib.dump(id_to_code_sq, code_map_path)

print("models saved")

squat: validation performance
              precision    recall  f1-score   support

     instr_1       1.00      1.00      1.00       484
     instr_2       1.00      1.00      1.00       245
     instr_3       1.00      1.00      1.00       278
     instr_4       0.97      1.00      0.98       229
     instr_5       1.00      0.97      0.99       306

    accuracy                           0.99      1542
   macro avg       0.99      0.99      0.99      1542
weighted avg       0.99      0.99      0.99      1542

squat: test performance
              precision    recall  f1-score   support

     instr_1       1.00      1.00      1.00       484
     instr_2       1.00      1.00      1.00       246
     instr_3       1.00      1.00      1.00       278
     instr_4       0.99      1.00      1.00       229
     instr_5       1.00      1.00      1.00       306

    accuracy                           1.00      1543
   macro avg       1.00      1.00      1.00      1543
weighted avg       1.00