# Experiment 1: Pose MLP - Enhanced Feature Engineering

Robust comparison of pose-based models using **30-run aggregated statistics** with config-based training.

## Configurations

| Config | Features | Description |
|--------|----------|-------------|
| **A** (Baseline) | 19 | 13 joint angles + 6 distances |
| **B** (Specialized) | 37 | Base + 18 specialized discrimination features |

**Key Features:**
- Enhanced pose features with specialized discrimination for confusion clusters
- Temporal sequences (50 timesteps × N features)
- Subject-wise stratified train/val/test splits (no subject leakage)
- 30 runs per configuration with different random seeds
- Comprehensive statistical analysis and visualization

In [None]:
from pathlib import Path
import sys
import numpy as np

PROJECT_ROOT = Path('..').resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

# Force reload modules to pick up latest changes
import importlib
import src.data.data_loader
import src.data.dataset_builder
import src.scripts.experiment_1
importlib.reload(src.data.data_loader)
importlib.reload(src.data.dataset_builder)
importlib.reload(src.scripts.experiment_1)

from src.data.data_loader import load_pose_enhanced_data
from src.scripts.experiment_1 import train_experiment_1_multi_run


print("✅ Modules loaded successfully")

In [None]:
importlib.reload(src.utils.visualization)

from src.utils.visualization import (
    plot_confusion_matrix_from_metrics,
    plot_per_class_f1_scores,
    sort_labels_by_numeric_prefix,
    display_multi_run_summary,
    plot_multi_run_distributions,
    plot_best_worst_comparison,
    plot_aggregated_confusion_matrix,
    plot_dual_training_history,
    compare_multi_run_stats,
)
import matplotlib.pyplot as plt
import pandas as pd

print("✅ Visualization functions loaded")

---

## Configuration Selection

Select which configuration to run. Available options:

- **`A`** - Baseline: 19 features (13 angles + 6 distances)
- **`B`** - Base + Specialized: 37 features (19 + 18 specialized discrimination features)

In [None]:
# ============================================================
# 🔧 CONFIGURATION SELECTION - MODIFY THIS CELL
# ============================================================

# Select configuration: 'A' or 'B'
SELECTED_CONFIG = 'B'

# Select view: 'front' or 'side'
SELECTED_VIEW = 'side'

# ============================================================
# Configuration mappings (DO NOT MODIFY)
# ============================================================

CONFIG_MAP = {
    'A': {
        'name': 'Baseline',
        'feature_type_front': 'all',
        'feature_type_side': 'all',
        'num_features': 19,
        'config_front': 'experiment_1_baseline_front.yaml',
        'config_side': 'experiment_1_baseline_side.yaml',
        'description': '13 joint angles + 6 distances'
    },
    'B': {
        'name': 'Base + Specialized',
        'feature_type_front': 'front_all_extended',
        'feature_type_side': 'side_all_extended',
        'num_features': 37,
        'config_front': 'experiment_1_specialized_front.yaml',
        'config_side': 'experiment_1_specialized_side.yaml',
        'description': '19 base + 18 specialized discrimination features'
    }
}

# Get selected configuration
config_info = CONFIG_MAP[SELECTED_CONFIG]
config_file = config_info[f'config_{SELECTED_VIEW}']
feature_type = config_info[f'feature_type_{SELECTED_VIEW}']

print("=" * 60)
print(f"📋 SELECTED CONFIGURATION: {SELECTED_CONFIG} - {config_info['name']}")
print("=" * 60)
print(f"  View: {SELECTED_VIEW.upper()}")
print(f"  Feature type: {feature_type}")
print(f"  Number of features: {config_info['num_features']}")
print(f"  Description: {config_info['description']}")
print(f"  Config file: {config_file}")
print("=" * 60)

---

## Data Loading

Load and summarize the enhanced pose feature dataset for the selected view.

In [None]:
# Define paths to enhanced pose NPZ files
npz_paths = {
    'front': PROJECT_ROOT / 'datasets' / 'Mediapipe pose estimates' / 'pose_data_front_19_features.npz',
    'side': PROJECT_ROOT / 'datasets' / 'Mediapipe pose estimates' / 'pose_data_side_19_features.npz'
}

# Load data for selected view
npz_path = npz_paths[SELECTED_VIEW]

dataset, summary = load_pose_enhanced_data(str(npz_path), feature_type=feature_type)

print(f"\n📊 {SELECTED_VIEW.upper()} VIEW - {config_info['name']} Features:")
print(f"  Samples: {summary['count']}")
print(f"  Subjects: {summary['unique_subjects']}")
print(f"  Classes: {summary['unique_classes']}")
print(f"  Temporal shape: {summary['temporal_shape']} (timesteps × features)")
print(f"  Flattened to: {summary['temporal_shape'][0] * summary['temporal_shape'][1]} features per sample")
print(f"\n  Feature names ({len(summary['feature_names'])}):")
for i, name in enumerate(summary['feature_names'][:10]):
    print(f"    {i+1}. {name}")
if len(summary['feature_names']) > 10:
    print(f"    ... and {len(summary['feature_names']) - 10} more")

---

## Multi-Run Training (30 Runs)

Execute 30 training runs with different random seeds using the selected configuration.

In [None]:
# Train model with multi-run (30 runs)
print("=" * 80)
print(f"MULTI-RUN TRAINING: CONFIG {SELECTED_CONFIG} ({config_info['name']}) - {SELECTED_VIEW.upper()} VIEW")
print("=" * 80)

config_path = PROJECT_ROOT / 'config' / config_file

multi_run_results, aggregated_stats = train_experiment_1_multi_run(
    npz_path=str(npz_path),
    config_path=str(config_path)
)

print(f"\n✅ Config {SELECTED_CONFIG} ({SELECTED_VIEW} view) multi-run training complete!")
print(f"Mean Test Accuracy: {aggregated_stats['test_accuracy']['mean']:.4f} ± {aggregated_stats['test_accuracy']['std']:.4f}")
print(f"Mean Test Macro F1: {aggregated_stats['test_macro_f1']['mean']:.4f} ± {aggregated_stats['test_macro_f1']['std']:.4f}")

---

## Multi-Run Analysis

Detailed analysis of 30 runs for the selected configuration.

In [None]:
# Display summary table for runs
print("=" * 80)
print(f"CONFIG {SELECTED_CONFIG} ({config_info['name']}) - {SELECTED_VIEW.upper()} VIEW: SUMMARY OF 30 RUNS")
print("=" * 80)
summary_df = display_multi_run_summary(multi_run_results, aggregated_stats)

### Distribution Plots

In [None]:
# Plot distributions for runs
plot_multi_run_distributions(multi_run_results, aggregated_stats)

In [None]:
# Best vs worst run comparison
label_names = sorted(multi_run_results[0]['label_to_int'].keys())
ordered_labels = sort_labels_by_numeric_prefix(label_names)

print("\n" + "=" * 80)
print(f"CONFIG {SELECTED_CONFIG}: BEST vs WORST RUN COMPARISON")
print("=" * 80)
comparison_df = plot_best_worst_comparison(multi_run_results, ordered_labels)

### Best vs Worst Run Comparison

In [None]:
plot_aggregated_confusion_matrix(
    multi_run_results,
    label_names=ordered_labels,
    desired_class_order=ordered_labels,
    normalize=True
)

In [None]:
# Get best run for detailed analysis
best_run = max(multi_run_results, key=lambda x: x['test_metrics']['macro_f1'])

print(f"🏆 Best Run Performance (Config {SELECTED_CONFIG} - {SELECTED_VIEW}):")
print(f"   Accuracy: {best_run['test_metrics']['accuracy']:.4f}")
print(f"   Macro F1: {best_run['test_metrics']['macro_f1']:.4f}")

In [None]:
# Plot training history for best run
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(best_run['history']['loss'], label='Train Loss', linewidth=2)
axes[0].plot(best_run['history']['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title(f'Config {SELECTED_CONFIG} Best Run: Loss Curves')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(best_run['history']['accuracy'], label='Train Acc', linewidth=2)
axes[1].plot(best_run['history']['val_accuracy'], label='Val Acc', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title(f'Config {SELECTED_CONFIG} Best Run: Accuracy Curves')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### Per-Class F1 Scores

In [None]:
# Per-class F1 scores for best run
idx_to_label = {int(k): v for k, v in best_run['int_to_label'].items()}

fig, ax = plt.subplots(figsize=(12, 6))
plot_per_class_f1_scores(
    best_run['test_metrics']['per_class_f1'],
    idx_to_label,
    desired_class_order=ordered_labels,
    ax=ax
)
ax.set_title(f'Config {SELECTED_CONFIG} Best Run: Per-Class F1 Scores', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Confusion matrix for best run
conf_matrix = np.array(best_run['test_metrics']['confusion_matrix'])

fig, ax = plt.subplots(figsize=(14, 12))
plot_confusion_matrix_from_metrics(
    conf_matrix,
    current_class_order=label_names,
    desired_class_order=ordered_labels,
    normalize=True,
    title=f'Config {SELECTED_CONFIG} Best Run: Normalized Confusion Matrix'
)
plt.tight_layout()
plt.show()

# Final summary

In [None]:
print("=" * 80)
print(f"FINAL SUMMARY: CONFIG {SELECTED_CONFIG} ({config_info['name']}) - {SELECTED_VIEW.upper()} VIEW")
print("=" * 80)
print(f"\nConfiguration Details:")
print(f"  Feature type: {config_info['feature_type']}")
print(f"  Number of features: {config_info['num_features']}")
print(f"  Description: {config_info['description']}")

print(f"\nResults (30 runs):")
print(f"  Test Accuracy: {aggregated_stats['test_accuracy']['mean']:.4f} ± {aggregated_stats['test_accuracy']['std']:.4f}")
print(f"  Test Macro F1: {aggregated_stats['test_macro_f1']['mean']:.4f} ± {aggregated_stats['test_macro_f1']['std']:.4f}")
print(f"  Accuracy Range: [{aggregated_stats['test_accuracy']['min']:.4f}, {aggregated_stats['test_accuracy']['max']:.4f}]")
print(f"  F1 Range: [{aggregated_stats['test_macro_f1']['min']:.4f}, {aggregated_stats['test_macro_f1']['max']:.4f}]")

print(f"\nBest Run:")
print(f"  Accuracy: {best_run['test_metrics']['accuracy']:.4f}")
print(f"  Macro F1: {best_run['test_metrics']['macro_f1']:.4f}")
print("=" * 80)

---

## (Optional) Load & Compare Multiple Configurations

Run this section after running multiple configurations to compare their results.

### Side-View Specialized Features (Config B - Side)

The side-view specialized features experiment includes **18 additional features** designed to address confusion patterns specific to the side-view camera angle:

**Feature Groups:**
1. **Vertical Displacement (4 features)**: Targets Shrugs vs Calf Raises
   - `shoulder_elevation_y`, `heel_ground_clearance`, `shoulder_hip_y_ratio`, `ear_shoulder_compression`

2. **Overhead Arm Position (4 features)**: Targets Overhead Triceps Extension
   - `elbow_above_shoulder`, `wrist_above_elbow`, `upper_arm_vertical_angle_side`, `forearm_vertical_angle_side`

3. **Sagittal Arm Trajectory (4 features)**: Targets Curl variants and Pressing movements
   - `wrist_forward_of_shoulder`, `elbow_forward_of_hip`, `arm_reach_forward`, `elbow_tuck_side`

4. **Hip Hinge Profile (4 features)**: Targets Deadlift/Rows/Kickbacks
   - `torso_angle_from_vertical`, `hip_behind_ankle`, `shoulder_forward_of_hip`, `knee_hip_alignment_z`

5. **Postural Stability (2 features)**: General body position context
   - `stance_width_normalized`, `center_of_mass_y`

**Total Features**: 19 base + 18 specialized = **37 features** × 50 timesteps = 1850 input dimensions

**Expected Results**: These features are designed to improve F1 scores for previously underperforming exercises (Shrugs: baseline 0.59, Overhead Triceps Extension: baseline 0.65)

In [None]:
# Load results from saved multi-run folders (after running multiple configs)
import json
from pathlib import Path

def load_multi_run_stats(results_dir: str) -> dict:
    """Load aggregated stats from a multi-run results folder."""
    stats_path = Path(results_dir) / 'aggregated_stats.json'
    if stats_path.exists():
        with open(stats_path, 'r') as f:
            return json.load(f)
    return None

# Define result directories for each config and view combination
# Update the multi_run folder numbers to match your actual results
result_dirs_front = {
    'A': PROJECT_ROOT / 'output/exer_recog/exp_01_pose_mlp_baseline/front/multi_run_001',
    'B': PROJECT_ROOT / 'output/exer_recog/exp_01_pose_mlp_specialized/front/multi_run_001',
}

result_dirs_side = {
    'A': PROJECT_ROOT / 'output/exer_recog/exp_01_pose_mlp_baseline/side/multi_run_003',
    'B': PROJECT_ROOT / 'output/exer_recog/exp_01_pose_mlp_specialized/side/multi_run_001',
}

# Select appropriate result directories based on current view
result_dirs = result_dirs_front if SELECTED_VIEW == 'front' else result_dirs_side

# Load stats directly from specified folders
all_stats = {}
for config_name, multi_run_folder in result_dirs.items():
    if multi_run_folder.exists():
        stats = load_multi_run_stats(str(multi_run_folder))
        if stats:
            all_stats[config_name] = stats
            print(f"✅ Loaded Config {config_name} ({SELECTED_VIEW}) from {multi_run_folder.name}")
        else:
            print(f"⚠️  Config {config_name}: No aggregated_stats.json found in {multi_run_folder}")
    else:
        print(f"⚠️  Config {config_name}: Directory not found - {multi_run_folder}")

print(f"\nLoaded {len(all_stats)} configurations for comparison ({SELECTED_VIEW} view)")

In [None]:
# Compare all loaded configurations
if len(all_stats) >= 2:
    print("=" * 80)
    print(f"CONFIGURATION COMPARISON - {SELECTED_VIEW.upper()} VIEW")
    print("=" * 80)
    
    comparison_data = []
    for config_name, stats in sorted(all_stats.items()):
        config_detail = CONFIG_MAP.get(config_name, {'name': 'Unknown', 'num_features': '?'})
        comparison_data.append({
            'Config': f"{config_name} ({config_detail['name']})",
            'Features': config_detail['num_features'],
            'Accuracy': f"{stats['test_accuracy']['mean']:.4f} ± {stats['test_accuracy']['std']:.4f}",
            'Macro F1': f"{stats['test_macro_f1']['mean']:.4f} ± {stats['test_macro_f1']['std']:.4f}",
            'Acc Range': f"[{stats['test_accuracy']['min']:.4f}, {stats['test_accuracy']['max']:.4f}]",
            'F1 Range': f"[{stats['test_macro_f1']['min']:.4f}, {stats['test_macro_f1']['max']:.4f}]"
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    print("\n")
    print(comparison_df.to_string(index=False))
    
    # Calculate improvement from Config A to Config B
    if 'A' in all_stats and 'B' in all_stats:
        acc_improvement = all_stats['B']['test_accuracy']['mean'] - all_stats['A']['test_accuracy']['mean']
        f1_improvement = all_stats['B']['test_macro_f1']['mean'] - all_stats['A']['test_macro_f1']['mean']
        print(f"\n📈 Improvement from Baseline (A) to Specialized (B):")
        print(f"   Accuracy: {acc_improvement:+.4f} ({acc_improvement*100:+.2f}%)")
        print(f"   Macro F1: {f1_improvement:+.4f} ({f1_improvement*100:+.2f}%)")
    
    # Find best config
    best_config = max(all_stats.keys(), key=lambda x: all_stats[x]['test_macro_f1']['mean'])
    best_f1 = all_stats[best_config]['test_macro_f1']['mean']
    print(f"\n🏆 Best Configuration: {best_config} ({CONFIG_MAP[best_config]['name']}) with Macro F1 = {best_f1:.4f}")
    print("=" * 80)
else:
    print("⚠️  Need at least 2 configurations to compare. Run more configs first.")

---

## (Optional) Load & Plot Aggregated Confusion Matrix from Saved Results

Load a saved multi-run result and plot its aggregated confusion matrix across all 30 runs.

In [None]:
# Load all runs from a saved multi-run folder and compute aggregated confusion matrix
import json
from pathlib import Path

# Specify the multi-run folder to load (update path as needed)
MULTI_RUN_PATH = PROJECT_ROOT / 'output/exer_recog/exp_01_pose_mlp_specialized/front/multi_run_010'

# Load all runs data
all_runs_path = MULTI_RUN_PATH / 'all_runs.json'
if all_runs_path.exists():
    with open(all_runs_path, 'r') as f:
        loaded_runs = json.load(f)
    
    print(f"✅ Loaded {len(loaded_runs)} runs from {MULTI_RUN_PATH.name}")
    
    # Extract label names and create ordered list
    first_run = loaded_runs[0]
    label_names = sorted(first_run['label_to_int'].keys())
    ordered_labels = sort_labels_by_numeric_prefix(label_names)
    num_classes = len(label_names)
    
    # Aggregate confusion matrices
    aggregated_cm = np.zeros((num_classes, num_classes), dtype=np.float32)
    for run in loaded_runs:
        cm = np.array(run['test_metrics']['confusion_matrix'])
        aggregated_cm += cm
    
    # Average across all runs
    aggregated_cm /= len(loaded_runs)
    
    # Plot aggregated confusion matrix
    plot_confusion_matrix_from_metrics(
        aggregated_cm,
        current_class_order=label_names,
        desired_class_order=ordered_labels,
        normalize=True,
        title=f'Aggregated Confusion Matrix (Average of {len(loaded_runs)} runs) - Normalized'
    )
    plt.tight_layout()
    plt.show()
    
    print(f"\n📊 Confusion Matrix Statistics:")
    print(f"  Total runs averaged: {len(loaded_runs)}")
    print(f"  Classes: {num_classes}")
    
else:
    print(f"⚠️  File not found: {all_runs_path}")
    print(f"   Make sure to run training first or update MULTI_RUN_PATH")