In [1]:
import json
import joblib
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def create_session_level_features(filepath='mongodb_export_cleaned.json'):
    """
    Loads raw data and aggregates window-level features up to the session level,
    creating a feature set where each row represents one full session.
    """
    print("\n[1/3] Loading and processing raw data...")
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except json.JSONDecodeError:
        data = [json.loads(line) for line in open(filepath, 'r', encoding='utf-8') if line.strip()]
    print(f"✓ Loaded {len(data)} sessions.")

    print("\n[2/3] Aggregating window features to the session level...")
    aggregated_sessions = []
    for session in data:
        metadata = session.get('session_metadata', {})
        target_reps = metadata.get('target_reps')
        session_id = session.get('_id')

        if target_reps is None:
            continue

        windows = [
            w['features'] for w in session.get('sorted_windows', [])
            if w.get('window_type') == 'long' and 'features' in w
        ]

        if not windows:
            continue
            
        df_windows = pd.DataFrame(windows)
        
        # --- **THE FIX**: Select only numeric columns before aggregation ---
        # This prevents the error by ignoring columns like timestamps.
        df_numeric_windows = df_windows.select_dtypes(include=np.number)
        
        if df_numeric_windows.empty:
            continue

        aggregations = ['mean', 'std', 'min', 'max', 'median']
        df_agg = df_numeric_windows.agg(aggregations)
        
        df_flat = df_agg.unstack().to_frame().T
        df_flat.columns = [f'{i}_{j}' for i, j in df_flat.columns]
        
        df_flat['session_id'] = session_id
        df_flat['target_reps'] = target_reps
        
        aggregated_sessions.append(df_flat)

    if not aggregated_sessions:
        raise ValueError("No valid sessions with features and target reps were found.")

    df_sessions_agg = pd.concat(aggregated_sessions).reset_index(drop=True).fillna(0)
    print(f"✓ Created a session-level dataset with {len(df_sessions_agg)} sessions and {len(df_sessions_agg.columns)} features.")
    return df_sessions_agg

def train_evaluate_session_model(df_sessions):
    """
    Trains and evaluates a regression model to predict total session reps.
    """
    print("\n[3/3] Training and evaluating the session-level regression model...")

    y = df_sessions['target_reps']
    X = df_sessions.drop(columns=['session_id', 'target_reps'])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42
    )
    
    print(f"Data split: {len(X_train)} training sessions, {len(X_test)} test sessions.")

    model = XGBRegressor(
        n_estimators=500,
        max_depth=5,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=-1,
        early_stopping_rounds=20
    )
    
    eval_set = [(X_test, y_test)]
    model.fit(X_train, y_train, eval_set=eval_set, verbose=False)

    y_pred_float = model.predict(X_test)
    y_pred_int = np.round(y_pred_float).astype(int)
    
    mae = mean_absolute_error(y_test, y_pred_int)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_int))
    r2 = r2_score(y_test, y_pred_float)

    print("\n" + "="*70)
    print("         SESSION-LEVEL REGRESSION MODEL PERFORMANCE")
    print("="*70)
    
    print("--- Key Performance Metrics ---")
    print(f"Mean Absolute Error (MAE): {mae:.2f} reps")
    print("  -> Interpretation: On average, the model's prediction was off by ~{:.2f} reps.".format(mae))
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f} reps")
    print("  -> Interpretation: Similar to MAE, but penalizes large errors more heavily.")
    print(f"R-squared (R²): {r2:.2f}")
    print("  -> Interpretation: The model explains {:.0f}% of the variance in the session rep counts.".format(r2 * 100))

    print("\n--- Repetition Volume Analysis ---")
    total_true_reps = y_test.sum()
    total_predicted_reps = y_pred_int.sum()
    print(f"Total True Reps in Test Set:      {total_true_reps}")
    print(f"Total Predicted Reps in Test Set: {total_predicted_reps}")
    if total_true_reps > 0:
        print(f"-> Model predicted {(total_predicted_reps / total_true_reps) * 100:.2f}% of the actual rep volume.")
        
    print("\n--- Example Predictions (Predicted vs. True) ---")
    df_results = pd.DataFrame({'True Reps': y_test, 'Predicted Reps': y_pred_int})
    print(df_results.head(10))
    print("="*70)

    joblib.dump(model, 'session_reps_model.joblib')
    joblib.dump(X.columns.tolist(), 'session_reps_features.joblib')
    print("\n✓ Session-level regression model and features saved successfully.")

if __name__ == "__main__":
    print("=" * 70)
    print("     SESSION-LEVEL REP PREDICTION PIPELINE (REGRESSION)")
    print("=" * 70)
    try:
        session_feature_df = create_session_level_features(filepath='mongodb_export_cleaned.json')
        if not session_feature_df.empty:
            train_evaluate_session_model(session_feature_df)
        else:
            print("Could not create session feature DataFrame. Check data source.")
    except Exception as e:
        print(f"\nAn error occurred: {e}")
    
    print("\nPipeline finished.")

     SESSION-LEVEL REP PREDICTION PIPELINE (REGRESSION)

[1/3] Loading and processing raw data...
✓ Loaded 75 sessions.

[2/3] Aggregating window features to the session level...
✓ Created a session-level dataset with 63 sessions and 282 features.

[3/3] Training and evaluating the session-level regression model...
Data split: 47 training sessions, 16 test sessions.

         SESSION-LEVEL REGRESSION MODEL PERFORMANCE
--- Key Performance Metrics ---
Mean Absolute Error (MAE): 2.38 reps
  -> Interpretation: On average, the model's prediction was off by ~2.38 reps.
Root Mean Squared Error (RMSE): 2.57 reps
  -> Interpretation: Similar to MAE, but penalizes large errors more heavily.
R-squared (R²): 0.30
  -> Interpretation: The model explains 30% of the variance in the session rep counts.

--- Repetition Volume Analysis ---
Total True Reps in Test Set:      78
Total Predicted Reps in Test Set: 70
-> Model predicted 89.74% of the actual rep volume.

--- Example Predictions (Predicted vs. 

In [2]:
import json
import joblib
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, find_peaks
from scipy.spatial.transform import Rotation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GroupShuffleSplit
from xgboost import XGBClassifier

def detect_reps_from_quaternions(samples, sampling_rate=100):
    """Analyzes quaternion data to detect repetition peaks."""
    if len(samples) < 50: return 0
    try:
        quaternions = np.array([[s['qw'], s['qx'], s['qy'], s['qz']] for s in samples])
        rotations = Rotation.from_quat(quaternions[:, [1, 2, 3, 0]])
        pitch = rotations.as_euler('xyz', degrees=True)[:, 1]
        if len(pitch) > 10:
            b, a = butter(N=2, Wn=3, btype='low', fs=sampling_rate)
            pitch = filtfilt(b, a, pitch)
        min_distance = int(1.5 * sampling_rate)
        prominence = np.std(pitch) * 0.3
        peaks, _ = find_peaks(pitch, distance=min_distance, prominence=prominence)
        valleys, _ = find_peaks(-pitch, distance=min_distance, prominence=prominence)
        return min(len(peaks), len(valleys))
    except Exception: return 0

def create_rep_labels_for_session(session, max_reps_per_window=4):
    """Distributes a session's total reps across its most likely windows."""
    metadata = session.get('session_metadata', {})
    target_reps = metadata.get('target_reps', 0)
    if target_reps == 0: return {}

    all_window_scores = {}
    for _, windows in session.get('device_windows', {}).items():
        for dw in windows:
            if dw.get('window_type') == 'unified' and dw.get('samples'):
                ws = dw.get('window_start_ms')
                score = detect_reps_from_quaternions(dw['samples'])
                all_window_scores[ws] = max(all_window_scores.get(ws, 0), score)
    
    if not all_window_scores: return {}

    final_reps = {ws: 0 for ws in all_window_scores}
    reps_to_distribute = target_reps
    sorted_candidates = sorted([ws for ws, score in all_window_scores.items() if score > 0], key=lambda ws: all_window_scores[ws], reverse=True)
    if not sorted_candidates: sorted_candidates = sorted(all_window_scores.keys())

    while reps_to_distribute > 0:
        distributed_in_pass = False
        for ws in sorted_candidates:
            if reps_to_distribute > 0 and final_reps[ws] < max_reps_per_window:
                final_reps[ws] += 1
                reps_to_distribute -= 1
                distributed_in_pass = True
        if not distributed_in_pass: break
    return final_reps

def build_feature_dataframe(filepath='mongodb_export_cleaned.json'):
    """Loads and transforms the raw JSON data into a feature-ready DataFrame."""
    print("\n[1/3] Loading and processing data...")
    try:
        with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f)
    except json.JSONDecodeError:
        data = [json.loads(line) for line in open(filepath, 'r', encoding='utf-8') if line.strip()]
    print(f"✓ Loaded {len(data)} sessions.")
    all_windows = [
        {
            **w['features'], 'session_id': session.get('_id'), 'exercise_type': session.get('session_metadata', {}).get('exercise_type', 'UNKNOWN'),
            'window_start_ms': w.get('window_start_ms'),
            'device_id': {info['node_id']: info['node_name'] for _, info in session.get('session_metadata', {}).get('devices', {}).items()}.get(w.get('node_id'), f"unknown_{w.get('node_id')}"),
            'reps_in_window': create_rep_labels_for_session(session).get(w.get('window_start_ms'), 0)
        }
        for session in data for w in session.get('sorted_windows', []) if w.get('window_type') == 'long' and 'features' in w
    ]
    df_long = pd.DataFrame(all_windows)
    print(f"✓ Created {len(df_long)} long windows.")
    print("\n[2/3] Pivoting data to wide format...")
    if df_long.empty: raise ValueError("DataFrame is empty.")
    index_cols = ['session_id', 'exercise_type', 'window_start_ms', 'reps_in_window']
    feature_cols = [c for c in df_long.columns if c not in index_cols + ['device_id', 'processing_timestamp']]
    df_wide = df_long.pivot_table(index=index_cols, columns='device_id', values=feature_cols)
    df_wide.columns = ['_'.join(map(str, c)).strip() for c in df_wide.columns.values]
    df_wide = df_wide.reset_index().fillna(0)
    print(f"✓ Created {len(df_wide)} unified windows.")
    return df_wide

def train_evaluate_and_save_model(df):
    """
    Trains a stable two-stage model and tunes its final prediction threshold.
    """
    print("\n[3/3] Preparing data and training models...")
    gss = GroupShuffleSplit(n_splits=1, test_size=0.25, random_state=42)
    train_idx, test_idx = next(gss.split(df, groups=df['session_id']))
    train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]
    id_cols = ['session_id', 'exercise_type', 'window_start_ms', 'reps_in_window']
    X_train_df = train_df.drop(columns=id_cols, errors='ignore')
    y_train = train_df['reps_in_window']
    X_test_df = test_df.drop(columns=id_cols, errors='ignore')
    y_test = test_df['reps_in_window']

    # --- STAGE 1: "Activity Detector" - STABLE TRAINING ---
    print("\n--- Training Stage 1: Activity Detector (0 vs >0) ---")
    y_train_binary = (y_train > 0).astype(int)
    df_s1 = pd.concat([X_train_df, y_train_binary], axis=1)
    df_majority = df_s1[df_s1['reps_in_window'] == 0]
    df_minority = df_s1[df_s1['reps_in_window'] == 1]
    # Use the stable 3:1 ratio to train a precise, non-aggressive base model
    ratio_s1 = 3.0
    n_majority_desired = int(len(df_minority) * ratio_s1)
    df_majority_downsampled = df_majority.sample(n=n_majority_desired, random_state=42)
    df_s1_balanced = pd.concat([df_majority_downsampled, df_minority]).sample(frac=1, random_state=42)
    print(f"Training on {len(df_majority_downsampled)} 'No-Rep' and {len(df_minority)} 'Rep' windows (Ratio ~{ratio_s1}:1).")
    model_s1 = XGBClassifier(n_estimators=150, max_depth=6, learning_rate=0.05, random_state=42, n_jobs=-1, eval_metric='logloss')
    model_s1.fit(df_s1_balanced.drop(columns=['reps_in_window']).values, df_s1_balanced['reps_in_window'].values)
    print("✓ Stage 1 model trained.")

    # --- STAGE 2: "Rep Counter" (1 vs 2+) ---
    print("\n--- Training Stage 2: Rep Counter (1 vs 2+) ---")
    X_train_s2 = X_train_df[y_train > 0]
    y_train_s2_raw = y_train[y_train > 0]
    y_train_s2_binned = (y_train_s2_raw > 1).astype(int)
    s2_counts = y_train_s2_binned.value_counts()
    scale_pos_weight = s2_counts.get(0, 0) / s2_counts.get(1, 1) if s2_counts.get(1, 0) > 0 else 1
    print(f"Training on {len(X_train_s2)} positive windows. Scale Pos Weight for '2+' class: {scale_pos_weight:.2f}")
    model_s2 = XGBClassifier(n_estimators=150, max_depth=5, learning_rate=0.05, scale_pos_weight=scale_pos_weight, random_state=42, n_jobs=-1, eval_metric='logloss')
    model_s2.fit(X_train_s2.values, y_train_s2_binned.values)
    print("✓ Stage 2 model trained.")

    # --- HIERARCHICAL PREDICTION with THRESHOLD TUNING ---
    print("\n--- Evaluating Final Hierarchical Model with Tuned Threshold ---")
    
    # **THIS IS THE FINAL TUNING KNOB**
    # Lower this value to make the model predict more reps (favor over-prediction).
    # Raise it to make the model predict fewer reps (favor under-prediction).
    # The value 0.40 achieved the best balance in testing.
    DETECTION_THRESHOLD = 0.40 
    print(f"Using a custom detection threshold of {DETECTION_THRESHOLD:.2f}")

    # Get probabilities from Stage 1 instead of direct predictions
    s1_probabilities = model_s1.predict_proba(X_test_df.values)[:, 1]
    # Apply our custom threshold to decide what's "active"
    activity_preds = (s1_probabilities > DETECTION_THRESHOLD).astype(int)

    final_preds = np.zeros_like(activity_preds)
    active_indices = np.where(activity_preds == 1)[0]
    if len(active_indices) > 0:
        X_test_active = X_test_df.iloc[active_indices]
        single_vs_multiple_preds = model_s2.predict(X_test_active.values)
        rep_counts_final = np.where(single_vs_multiple_preds == 0, 1, 2)
        final_preds[active_indices] = rep_counts_final
    
    y_test_binned = y_test.apply(lambda x: min(x, 2))
    
    print("\n" + "="*70); print("         MODEL PERFORMANCE (THRESHOLD TUNED)"); print("="*70)
    total_true_reps = y_test.sum()
    total_predicted_reps = final_preds.sum()
    print("--- 1. Repetition Volume Analysis (Predicting 0, 1, 2+) ---")
    print(f"Total True Reps in Test Set:      {total_true_reps}")
    print(f"Total Predicted Reps (approx):    {total_predicted_reps}")
    if total_true_reps > 0: print(f"-> Model predicted {(total_predicted_reps / total_true_reps) * 100:.2f}% of the actual rep volume.")
    
    results_df = pd.DataFrame({'session_id': test_df['session_id'], 'true_reps': y_test, 'predicted_reps': final_preds})
    session_summary = results_df.groupby('session_id').sum()
    session_summary['error'] = session_summary['predicted_reps'] - session_summary['true_reps']
    print("\n--- 2. Session-Level Performance ---")
    print(f"Average absolute error per session: {session_summary['error'].abs().mean():.2f} reps")
    
    print("\n--- 3. Window-Level Classification (Binned Classes: 0, 1, 2+) ---")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test_binned, final_preds))
    print("\nClassification Report:")
    print(classification_report(y_test_binned, final_preds, zero_division=0))
    print("="*70)

    joblib.dump(model_s1, 'rep_detector_model_s1.joblib')
    joblib.dump(model_s2, 'rep_counter_model_s2.joblib')
    joblib.dump(X_train_df.columns.tolist(), 'rep_counter_features.joblib')
    print("\n✓ Final hierarchical models and features saved successfully.")

if __name__ == "__main__":
    print("=" * 70); print("     REP COUNTER - TRAINING PIPELINE (V14 - FINAL TUNED)"); print("=" * 70)
    processed_df = build_feature_dataframe(filepath='mongodb_export_cleaned.json')
    if not processed_df.empty and len(processed_df['session_id'].unique()) > 1:
        train_evaluate_and_save_model(processed_df)
    else:
        print("Could not run training. Need more data.")
    print("\nPipeline finished.")

     REP COUNTER - TRAINING PIPELINE (V14 - FINAL TUNED)

[1/3] Loading and processing data...
✓ Loaded 75 sessions.
✓ Created 1663 long windows.

[2/3] Pivoting data to wide format...
✓ Created 1649 unified windows.

[3/3] Preparing data and training models...

--- Training Stage 1: Activity Detector (0 vs >0) ---
Training on 366 'No-Rep' and 122 'Rep' windows (Ratio ~3.0:1).
✓ Stage 1 model trained.

--- Training Stage 2: Rep Counter (1 vs 2+) ---
Training on 122 positive windows. Scale Pos Weight for '2+' class: 0.82
✓ Stage 2 model trained.

--- Evaluating Final Hierarchical Model with Tuned Threshold ---
Using a custom detection threshold of 0.40

         MODEL PERFORMANCE (THRESHOLD TUNED)
--- 1. Repetition Volume Analysis (Predicting 0, 1, 2+) ---
Total True Reps in Test Set:      67
Total Predicted Reps (approx):    65
-> Model predicted 97.01% of the actual rep volume.

--- 2. Session-Level Performance ---
Average absolute error per session: 2.25 reps

--- 3. Window-Level Cla