# Training Notebook — AI Motion & AI Tapper (scikit-learn + joblib)

Notebook ini melatih model untuk:
- **AI Motion**: `fatigue_level`, `stroke_type`, `movement_quality` (klasifikasi)
- **AI Tapper**: `safe_class` (klasifikasi) dan `optimal_threshold_cm` (regresi)

> Dataset: `ai_motion_historic_v2.csv`, `ai_tapper_historic_v2.csv` (5000 baris).

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import joblib
from pathlib import Path

motion_path = Path("ai_motion.csv")
tapper_path = Path("ai_tapper.csv")
dfm = pd.read_csv(motion_path)
dft = pd.read_csv(tapper_path)

dfm.head(), dft.head()

(  athlete_id session_id sensor_id  timestamp_s  body_roll_deg  pitch_deg  \
 0       A007       S001    SENS03            1           4.44      17.90   
 1       A007       S001    SENS03            2           3.23      11.83   
 2       A007       S001    SENS03            3          -3.11      18.94   
 3       A007       S001    SENS03            4          -1.31      14.08   
 4       A007       S001    SENS03            5          -6.33      14.92   
 
    yaw_deg  stroke_rate_spm  lap_speed_mps   stroke_type fatigue_level  \
 0     6.09            34.89          1.180  Breaststroke           Low   
 1     6.92            37.71          1.059  Breaststroke           Low   
 2     7.22            32.88          1.034  Breaststroke           Low   
 3     3.31            37.36          0.947  Breaststroke           Low   
 4     6.29            34.13          1.172  Breaststroke           Low   
 
   movement_quality  
 0           Medium  
 1              Bad  
 2              Ba

## 1) AI Motion — Train models (fatigue_level, stroke_type, movement_quality)

In [None]:
# Dataset (ubah path sesuai file CSV yang kamu upload di Colab)
dfm = pd.read_csv("ai_motion.csv")

motion_features = ["body_roll_deg","pitch_deg","yaw_deg","stroke_rate_spm","lap_speed_mps"]

def train_motion_classifier(target_col, model_name):
    X = dfm[motion_features].values
    y = dfm[target_col].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", RandomForestClassifier(n_estimators=250, random_state=42))
    ])

    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    print(f"[{model_name}] accuracy = {acc:.3f}")
    print(classification_report(y_test, y_pred))

    # Simpan model ke file pkl di /content/
    joblib.dump(pipe, f"{model_name}.pkl", compress=9)
    # return acc

# Train Model
acc_fatigue = train_motion_classifier("fatigue_level", "fatigue_model")
acc_stroke  = train_motion_classifier("stroke_type", "stroke_model")
acc_move    = train_motion_classifier("movement_quality", "movement_quality_model")

[fatigue_model] accuracy = 0.509
              precision    recall  f1-score   support

        High       0.58      0.56      0.57       301
         Low       0.51      0.50      0.51       300
      Medium       0.46      0.48      0.47       399

    accuracy                           0.51      1000
   macro avg       0.52      0.51      0.51      1000
weighted avg       0.51      0.51      0.51      1000

[stroke_model] accuracy = 0.993
              precision    recall  f1-score   support

  Backstroke       1.00      0.99      1.00       282
Breaststroke       1.00      0.99      0.99       254
   Butterfly       0.99      0.99      0.99       233
   Freestyle       0.99      1.00      0.99       231

    accuracy                           0.99      1000
   macro avg       0.99      0.99      0.99      1000
weighted avg       0.99      0.99      0.99      1000

[movement_quality_model] accuracy = 0.782
              precision    recall  f1-score   support

         Bad       0.8

## 2) AI Tapper — Train models (safe_class, optimal_threshold_cm)

In [None]:
# Import library di cell awal Colab
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import joblib

# Load dataset Tapper (ubah path sesuai lokasi file di Colab)
dft = pd.read_csv("ai_tapper.csv")

# Fitur
tapper_cls_features = ["distance_cm","time_to_wall_s","speed_mps","pace_s_per_25m"]
tapper_reg_features = ["distance_cm","speed_mps","pace_s_per_25m",
                       "th_early_cm","th_urgent_cm","th_safety_cm","time_to_wall_s"]

# -----------------------------
# Safe class (classification)
# -----------------------------
Xc = dft[tapper_cls_features].values
yc = dft["safe_class"].values

Xc_train, Xc_test, yc_train, yc_test = train_test_split(
    Xc, yc, test_size=0.2, stratify=yc, random_state=42
)

pipe_safe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", RandomForestClassifier(n_estimators=250, random_state=42))
])

pipe_safe.fit(Xc_train, yc_train)
yc_pred = pipe_safe.predict(Xc_test)

acc_safe = accuracy_score(yc_test, yc_pred)
print(f"[safe_model] accuracy = {acc_safe:.3f}")
print(classification_report(yc_test, yc_pred))

# Simpan model ke /content/
joblib.dump(pipe_safe, "safe_model.pkl", compress=9)

# -----------------------------
# Threshold (regression)
# -----------------------------
Xr = dft[tapper_reg_features].values
yr = dft["optimal_threshold_cm"].values

Xr_train, Xr_test, yr_train, yr_test = train_test_split(
    Xr, yr, test_size=0.2, random_state=42
)

pipe_th = Pipeline([
    ("scaler", StandardScaler()),
    ("reg", RandomForestRegressor(n_estimators=350, random_state=42))
])

pipe_th.fit(Xr_train, yr_train)
yr_pred = pipe_th.predict(Xr_test)

mae = mean_absolute_error(yr_test, yr_pred)
r2  = r2_score(yr_test, yr_pred)
print(f"[threshold_model] MAE = {mae:.2f}, R2 = {r2:.3f}")

# Import Model
joblib.dump(pipe_th, "threshold_model.pkl", compress=9)


[safe_model] accuracy = 1.000
              precision    recall  f1-score   support

        Safe       1.00      1.00      1.00       922
      Urgent       1.00      1.00      1.00        41

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000

[threshold_model] MAE = 1.53, R2 = 0.910


['threshold_model.pkl']