In [1]:
import pandas as pd
import json
import glob
import os
import numpy as np
from datetime import datetime
import sys
import hashlib

# --- Setup Paths ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")
if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)

# ============================================================
# Safe Parsing Helpers (per specification)
# ============================================================
def safe_get(d, *keys, default='N/A'):
    """Safe nested dictionary access with default fallback"""
    for key in keys:
        if isinstance(d, dict):
            d = d.get(key, {})
        else:
            return default
    return d if (d != {} and d is not None) else default

def safe_float(x, default=0.0):
    """Convert to float safely, strip %, handle None/N/A"""
    if x is None or x == 'N/A':
        return default
    try:
        if isinstance(x, str):
            x = x.replace('%', '').strip()
        return float(x)
    except:
        return default

def compute_file_hash(filepath):
    """
    Compute SHA-256 hash of a file for data integrity verification.
    Returns hash string or 'FILE_NOT_FOUND' if file doesn't exist.
    """
    if not os.path.exists(filepath):
        return 'FILE_NOT_FOUND'
    
    sha256_hash = hashlib.sha256()
    try:
        with open(filepath, "rb") as f:
            # Read file in chunks to handle large files efficiently
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
        return sha256_hash.hexdigest()
    except Exception as e:
        return f'ERROR: {str(e)}'

# ============================================================
# File Discovery (exact suffix matching per specification)
# ============================================================
DERIV_ROOT = os.path.join(PROJECT_ROOT, "derivatives")

# Required suffixes for exact matching (updated to match actual file names)
REQUIRED_SUFFIXES = [
    "__step01_loader_report.json",  # step_01 (actual naming)
    "__preprocess_summary.json",    # step_02  
    "__filtering_summary.json",     # step_04
    "__reference_summary.json",     # step_05
    "__kinematics_summary.json"     # step_06
]

# Scan recursively for *.json files
json_files = glob.glob(os.path.join(DERIV_ROOT, "**", "*.json"), recursive=True)

# Filter files with exact suffix matching
valid_files = []
for json_path in json_files:
    filename = os.path.basename(json_path)
    if any(filename.endswith(suffix) for suffix in REQUIRED_SUFFIXES):
        valid_files.append(json_path)

print(f"üìÅ Found {len(valid_files)} valid JSON files out of {len(json_files)} total")

# ============================================================
# Load + Group by Run_ID (per specification)
# ============================================================
from collections import defaultdict
runs = defaultdict(dict)

for json_path in valid_files:
    filename = os.path.basename(json_path)
    run_id = filename.split("__")[0]
    
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"‚ö†Ô∏è  Failed to load {filename}: {e}")
        continue
    
    # Categorize by exact suffix matching
    if filename.endswith("__step01_loader_report.json"):
        runs[run_id]["step_01"] = data
    elif filename.endswith("__preprocess_summary.json"):
        runs[run_id]["step_02"] = data
    elif filename.endswith("__filtering_summary.json"):
        runs[run_id]["step_04"] = data
    elif filename.endswith("__reference_summary.json"):
        runs[run_id]["step_05"] = data
    elif filename.endswith("__kinematics_summary.json"):
        runs[run_id]["step_06"] = data

# Skip runs missing critical data (require step_01 and step_06)
complete_runs = {rid: steps for rid, steps in runs.items() 
                if steps.get('step_01') and steps.get('step_06')}

print(f"üìä Found data for {len(complete_runs)} complete run(s)")
print(f"üìÅ Steps loaded per run:")
for rid, steps in complete_runs.items():
    print(f"  {rid}: {list(steps.keys())}")
print()

üìÅ Found 20 valid JSON files out of 48 total
üìä Found data for 3 complete run(s)
üìÅ Steps loaded per run:
  763_T2_P2_R2_Take_2025-12-25 10.51.23 AM_005: ['step_02', 'step_04', 'step_06', 'step_05', 'step_01']
  734_T1_P1_R1_Take 2025-12-01 02.18.27 PM: ['step_01', 'step_02', 'step_04', 'step_05', 'step_06']
  734_T1_P2_R1_Take 2025-12-01 02.28.24 PM: ['step_01', 'step_02', 'step_04', 'step_05', 'step_06']



# üìã Master Audit & Results Report

---

## Section 0: Data Lineage & Provenance
**Purpose:** Ensure recording traceability from raw file to final result (Cereatti et al., 2024)

In [None]:
# ============================================================
# SECTION 0: Data Lineage & Provenance
# ============================================================

# Get Git commit hash for pipeline version
try:
    import subprocess
    git_hash = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD'], 
                                       cwd=PROJECT_ROOT).decode('ascii').strip()
    pipeline_version_git = f"git-{git_hash}"
except Exception:
    pipeline_version_git = "unknown"

# Build provenance table (dictionary-first approach)
provenance_data = []

for run_id, steps in complete_runs.items():
    s01 = steps.get('step_01', {})
    
    # Extract identity information
    processing_date = safe_get(s01, 'identity', 'processing_timestamp')
    pipeline_version_reported = safe_get(s01, 'identity', 'pipeline_version')
    csv_source = safe_get(s01, 'identity', 'csv_source')
    optitrack_version = safe_get(s01, 'raw_data_quality', 'optitrack_version')
    
    # Parse Subject/Session/Take from run_id
    # Expected format: SubjectID_SessionID_P#_R#_Take...
    # Example: "734_T1_P1_R1_Take 2025-12-01 02.18.27 PM"
    parts = run_id.split('_')
    subject_id = parts[0] if len(parts) > 0 else 'N/A'
    session_id = parts[1] if len(parts) > 1 else 'N/A'
    
    # Extract P# and R# (Phrase and Repetition)
    phrase_num = 'N/A'
    rep_num = 'N/A'
    for part in parts:
        if part.startswith('P') and len(part) > 1 and part[1].isdigit():
            phrase_num = part
        elif part.startswith('R') and len(part) > 1 and part[1].isdigit():
            rep_num = part
    
    # Compute SHA-256 hashes
    csv_hash = compute_file_hash(csv_source)
    
    # Find the final processed derivative (step_06 kinematics parquet)
    final_derivative_path = os.path.join(PROJECT_ROOT, "derivatives", "step_06_kinematics", 
                                          f"{run_id}__kinematics.parquet")
    derivative_hash = compute_file_hash(final_derivative_path)
    
    # Integrity check: Verify data integrity
    # In future, step01 should store the hash for comparison verification
    if csv_hash.startswith('ERROR'):
        integrity_status = f"‚ö†Ô∏è {csv_hash}"
    elif csv_hash == 'FILE_NOT_FOUND':
        integrity_status = "‚ùå FILE_MISSING"
    else:
        integrity_status = "‚úÖ OK"
    
    provenance_data.append({
        'Run_ID': run_id,
        'Subject_ID': subject_id,
        'Session_ID': session_id,
        'Phrase': phrase_num,
        'Repetition': rep_num,
        'Processing_Date': processing_date,
        'OptiTrack_Version': optitrack_version,
        'Pipeline_Version_Reported': pipeline_version_reported,
        'Pipeline_Version_Git': pipeline_version_git,
        'Raw_CSV_Path': csv_source,
        'Raw_CSV_SHA256': csv_hash[:16] + '...' if len(csv_hash) > 16 else csv_hash,  # Truncate for display
        'Final_Derivative_SHA256': derivative_hash[:16] + '...' if len(derivative_hash) > 16 else derivative_hash,
        'Integrity_Status': integrity_status
    })

# Create DataFrame from provenance data (dictionary-first approach)
df_provenance = pd.DataFrame(provenance_data)

print("="*80)
print("SECTION 0: DATA LINEAGE & PROVENANCE")
print("="*80)
print(f"Total Runs: {len(df_provenance)}")
print(f"Pipeline Version (Git): {pipeline_version_git}")
print(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80)
print()

# Display provenance table
display(df_provenance)

---

## Section 1: The R√°cz Calibration Layer
**Purpose:** Verify the "Ground Truth" of the skeleton setup (R√°cz et al., 2025)

In [None]:
# ============================================================
# SECTION 1: The R√°cz Calibration Layer
# ============================================================

# Define calibration thresholds (per R√°cz et al., 2025)
POINTER_ERROR_THRESHOLD_MM = 2.0  # Anatomical landmark precision threshold
WAND_ERROR_THRESHOLD_MM = 1.0     # Global calibration quality threshold
STATIC_OFFSET_THRESHOLD_DEG = 15.0  # Significant joint misalignment threshold

# Build calibration quality table (dictionary-first approach)
calibration_data = []

for run_id, steps in complete_runs.items():
    s01 = steps.get('step_01', {})
    s05 = steps.get('step_05', {})
    
    # Extract OptiTrack calibration metrics (from step01)
    # Note: Currently these are not in the JSON, marked as N/A for future implementation
    pointer_error_mm = safe_float(safe_get(s01, 'calibration', 'pointer_tip_rms_error_mm'), default=None)
    wand_error_mm = safe_float(safe_get(s01, 'calibration', 'wand_error_mm'), default=None)
    
    # If not available in step01, mark as N/A (to be added to pipeline)
    if pointer_error_mm is None or pointer_error_mm == 0.0:
        pointer_error_mm = 'N/A'
        pointer_status = 'N/A'
    else:
        pointer_status = "‚ö†Ô∏è REVIEW" if pointer_error_mm > POINTER_ERROR_THRESHOLD_MM else "‚úÖ OK"
    
    if wand_error_mm is None or wand_error_mm == 0.0:
        wand_error_mm = 'N/A'
        wand_status = 'N/A'
    else:
        wand_status = "‚ö†Ô∏è REVIEW" if wand_error_mm > WAND_ERROR_THRESHOLD_MM else "‚úÖ OK"
    
    # Extract static offset corrections (from step05 reference detection)
    static_offsets = safe_get(s05, 'static_offset_audit', default={})
    
    left_offset_deg = safe_float(safe_get(static_offsets, 'Left', 'measured_angle_deg'), default=0.0)
    right_offset_deg = safe_float(safe_get(static_offsets, 'Right', 'measured_angle_deg'), default=0.0)
    max_static_offset = max(abs(left_offset_deg), abs(right_offset_deg))
    
    # Extract reference pose stability (from step05)
    ref_stability_mm = safe_float(safe_get(s05, 'reference_metrics', 'ref_stability_mm'), default=0.0)
    ref_status = safe_get(s05, 'reference_metrics', 'ref_quality_status', default='UNKNOWN')
    
    # Determine overall calibration status
    calibration_issues = []
    
    # Check pointer error (if available)
    if pointer_status == "‚ö†Ô∏è REVIEW":
        calibration_issues.append(f"Pointer Error > {POINTER_ERROR_THRESHOLD_MM}mm")
    
    # Check wand error (if available)
    if wand_status == "‚ö†Ô∏è REVIEW":
        calibration_issues.append(f"Wand Error > {WAND_ERROR_THRESHOLD_MM}mm")
    
    # Check static offset corrections
    if max_static_offset > STATIC_OFFSET_THRESHOLD_DEG:
        calibration_issues.append(f"Static Offset {max_static_offset:.1f}¬∞ > {STATIC_OFFSET_THRESHOLD_DEG}¬∞")
    
    # Check reference pose stability
    if ref_status != 'PASS':
        calibration_issues.append(f"Reference Status: {ref_status}")
    
    # Overall calibration status
    if len(calibration_issues) > 0:
        overall_status = "‚ö†Ô∏è REVIEW"
        issues_text = "; ".join(calibration_issues)
    else:
        overall_status = "‚úÖ OK"
        issues_text = "None"
    
    calibration_data.append({
        'Run_ID': run_id,
        'Pointer_Tip_RMS_Error_mm': pointer_error_mm,
        'Pointer_Status': pointer_status,
        'Wand_Error_mm': wand_error_mm,
        'Wand_Status': wand_status,
        'Left_Shoulder_Offset_deg': round(left_offset_deg, 2),
        'Right_Shoulder_Offset_deg': round(right_offset_deg, 2),
        'Max_Static_Offset_deg': round(max_static_offset, 2),
        'Ref_Stability_mm': round(ref_stability_mm, 2),
        'Ref_Status': ref_status,
        'Calibration_Status': overall_status,
        'Issues': issues_text
    })

# Create DataFrame from calibration data (dictionary-first approach)
df_calibration = pd.DataFrame(calibration_data)

print("="*80)
print("SECTION 1: R√ÅCZ CALIBRATION LAYER")
print("="*80)
print(f"Total Runs: {len(df_calibration)}")
print(f"Thresholds: Pointer ‚â§ {POINTER_ERROR_THRESHOLD_MM}mm, Wand ‚â§ {WAND_ERROR_THRESHOLD_MM}mm, Static Offset ‚â§ {STATIC_OFFSET_THRESHOLD_DEG}¬∞")
print("="*80)
print()

# Summary statistics
ok_count = (df_calibration['Calibration_Status'] == '‚úÖ OK').sum()
review_count = (df_calibration['Calibration_Status'] == '‚ö†Ô∏è REVIEW').sum()

print(f"Calibration Summary:")
print(f"  ‚úÖ OK: {ok_count}/{len(df_calibration)}")
print(f"  ‚ö†Ô∏è REVIEW: {review_count}/{len(df_calibration)}")
print()

# Display calibration table
display(df_calibration)

# Display detailed issues for REVIEW cases
if review_count > 0:
    print()
    print("="*80)
    print("CALIBRATION ISSUES REQUIRING REVIEW:")
    print("="*80)
    review_runs = df_calibration[df_calibration['Calibration_Status'] == '‚ö†Ô∏è REVIEW']
    for idx, row in review_runs.iterrows():
        print(f"\n{row['Run_ID']}:")
        print(f"  Issues: {row['Issues']}")
        print(f"  Note: Virtual joint centers may be shifted. Verify anatomical landmarks.")

---

## Section 2: Rigid-Body & Temporal Audit
**Purpose:** Prove the skeleton didn't "stretch" or "break" during the dynamic dance

In [None]:
# ============================================================
# SECTION 2: Rigid-Body & Temporal Audit
# ============================================================

# Define thresholds for rigid-body integrity
BONE_LENGTH_VARIANCE_THRESHOLD_PERCENT = 2.0  # Maximum acceptable bone length CV%
TEMPORAL_JITTER_THRESHOLD_MS = 0.5  # Maximum acceptable temporal jitter (std of dt)

# Build rigid-body and temporal audit table (dictionary-first approach)
rigid_body_data = []

for run_id, steps in complete_runs.items():
    s01 = steps.get('step_01', {})
    s02 = steps.get('step_02', {})
    s03 = steps.get('step_03', {})
    
    # ============================================================
    # RIGID-BODY INTEGRITY: Bone Length Stability
    # ============================================================
    # Extract bone length coefficient of variation (CV%) from step02
    # CV% measures how much bone lengths varied during the dynamic trial
    # CV = (std_dev / mean) * 100
    bone_cv_percent = safe_float(safe_get(s02, 'bone_qc_mean_cv'), default=0.0)
    bone_qc_status = safe_get(s02, 'bone_qc_status', default='UNKNOWN')
    worst_bone = safe_get(s02, 'worst_bone', default='N/A')
    bone_alerts = safe_get(s02, 'bone_qc_alerts', default=[])
    
    # Count number of problematic bones
    if isinstance(bone_alerts, list):
        num_bone_alerts = len(bone_alerts)
    else:
        num_bone_alerts = int(bone_alerts) if bone_alerts != 'N/A' else 0
    
    # Determine rigid-body status
    if bone_cv_percent > BONE_LENGTH_VARIANCE_THRESHOLD_PERCENT:
        rigid_body_status = "‚ö†Ô∏è REVIEW"
        rigid_body_issue = f"Bone CV {bone_cv_percent:.2f}% > {BONE_LENGTH_VARIANCE_THRESHOLD_PERCENT}%"
    elif num_bone_alerts > 0:
        rigid_body_status = "‚ö†Ô∏è REVIEW"
        rigid_body_issue = f"{num_bone_alerts} bone(s) with high variance"
    else:
        rigid_body_status = "‚úÖ OK"
        rigid_body_issue = "None"
    
    # ============================================================
    # TEMPORAL INTEGRITY: Sample Time Jitter
    # ============================================================
    # Extract temporal jitter from step03 (resampling)
    # time_grid_std_dt measures the standard deviation of inter-frame intervals
    # If frames were dropped, this will be non-zero
    time_jitter_sec = safe_float(safe_get(s03, 'time_grid_std_dt'), default=0.0)
    time_jitter_ms = round(time_jitter_sec * 1000, 3)  # Convert to ms
    temporal_status = safe_get(s03, 'temporal_status', default='UNKNOWN')
    
    # Extract sampling rate information
    sampling_rate_hz = safe_float(safe_get(s01, 'raw_data_quality', 'sampling_rate_actual'), default=120.0)
    total_frames = safe_get(s01, 'raw_data_quality', 'total_frames', default=0)
    
    # Determine temporal status
    if time_jitter_ms > TEMPORAL_JITTER_THRESHOLD_MS:
        temporal_audit_status = "‚ö†Ô∏è REVIEW"
        temporal_issue = f"Jitter {time_jitter_ms}ms > {TEMPORAL_JITTER_THRESHOLD_MS}ms"
    elif temporal_status != 'PERFECT':
        temporal_audit_status = "‚ö†Ô∏è REVIEW"
        temporal_issue = f"Temporal Status: {temporal_status}"
    else:
        temporal_audit_status = "‚úÖ OK"
        temporal_issue = "None"
    
    # ============================================================
    # OVERALL SECTION 2 STATUS
    # ============================================================
    section2_issues = []
    if rigid_body_status == "‚ö†Ô∏è REVIEW":
        section2_issues.append(rigid_body_issue)
    if temporal_audit_status == "‚ö†Ô∏è REVIEW":
        section2_issues.append(temporal_issue)
    
    if len(section2_issues) > 0:
        overall_section2_status = "‚ö†Ô∏è REVIEW"
        section2_issues_text = "; ".join(section2_issues)
    else:
        overall_section2_status = "‚úÖ OK"
        section2_issues_text = "None"
    
    rigid_body_data.append({
        'Run_ID': run_id,
        'Bone_Length_CV_%': round(bone_cv_percent, 3),
        'Bone_QC_Status': bone_qc_status,
        'Worst_Bone': worst_bone,
        'Num_Bone_Alerts': num_bone_alerts,
        'Rigid_Body_Status': rigid_body_status,
        'Sampling_Rate_Hz': round(sampling_rate_hz, 2),
        'Total_Frames': total_frames,
        'Time_Jitter_ms': time_jitter_ms,
        'Temporal_Status': temporal_status,
        'Temporal_Audit_Status': temporal_audit_status,
        'Section2_Status': overall_section2_status,
        'Issues': section2_issues_text
    })

# Create DataFrame from rigid-body data (dictionary-first approach)
df_rigid_body = pd.DataFrame(rigid_body_data)

print("="*80)
print("SECTION 2: RIGID-BODY & TEMPORAL AUDIT")
print("="*80)
print(f"Total Runs: {len(df_rigid_body)}")
print(f"Thresholds: Bone CV ‚â§ {BONE_LENGTH_VARIANCE_THRESHOLD_PERCENT}%, Time Jitter ‚â§ {TEMPORAL_JITTER_THRESHOLD_MS}ms")
print("="*80)
print()

# Summary statistics
ok_count = (df_rigid_body['Section2_Status'] == '‚úÖ OK').sum()
review_count = (df_rigid_body['Section2_Status'] == '‚ö†Ô∏è REVIEW').sum()

print(f"Rigid-Body & Temporal Summary:")
print(f"  ‚úÖ OK: {ok_count}/{len(df_rigid_body)}")
print(f"  ‚ö†Ô∏è REVIEW: {review_count}/{len(df_rigid_body)}")
print()

# Display rigid-body table
display(df_rigid_body)

# Display detailed issues for REVIEW cases
if review_count > 0:
    print()
    print("="*80)
    print("RIGID-BODY & TEMPORAL ISSUES REQUIRING REVIEW:")
    print("="*80)
    review_runs = df_rigid_body[df_rigid_body['Section2_Status'] == '‚ö†Ô∏è REVIEW']
    for idx, row in review_runs.iterrows():
        print(f"\n{row['Run_ID']}:")
        print(f"  Issues: {row['Issues']}")
        if row['Rigid_Body_Status'] == '‚ö†Ô∏è REVIEW':
            print(f"  Note: Skeleton may have 'stretched' or 'broken' during dynamic trial.")
            print(f"        Worst bone: {row['Worst_Bone']} (CV: {row['Bone_Length_CV_%']:.3f}%)")
        if row['Temporal_Audit_Status'] == '‚ö†Ô∏è REVIEW':
            print(f"  Note: Temporal inconsistency detected - frames may have been dropped.")
            print(f"        Time jitter: {row['Time_Jitter_ms']}ms")

---

## Section 3: Gap & Interpolation Transparency
**Purpose:** "No Silent Fixes" (Winter, 2009) - Full disclosure of data reconstruction

In [None]:
# ============================================================
# SECTION 3: Gap & Interpolation Transparency
# ============================================================

# Define thresholds per Winter (2009) - "No Silent Fixes"
MAX_ACCEPTABLE_MISSING_PERCENT = 5.0  # Reject if > 5% data reconstructed
LINEAR_FALLBACK_WARNING = True  # Flag linear interpolation as scientific compromise

# Build interpolation transparency table (dictionary-first approach)
interpolation_data = []

for run_id, steps in complete_runs.items():
    s01 = steps.get('step_01', {})
    s02 = steps.get('step_02', {})
    
    # ============================================================
    # GLOBAL INTERPOLATION METRICS
    # ============================================================
    # Extract overall missing data percentages
    raw_missing_percent = safe_float(safe_get(s02, 'raw_missing_percent'), default=0.0)
    post_missing_percent = safe_float(safe_get(s02, 'post_missing_percent'), default=0.0)
    frames_fixed_percent = raw_missing_percent  # Percentage of frames that required interpolation
    
    # Extract interpolation method and gap info
    interpolation_method = safe_get(s02, 'interpolation_method', default='unknown')
    max_gap_frames = safe_get(s02, 'max_interpolation_gap', default=0)
    
    # Sampling rate for gap duration calculation
    sampling_rate_hz = safe_float(safe_get(s01, 'raw_data_quality', 'sampling_rate_actual'), default=120.0)
    max_gap_ms = round((safe_float(max_gap_frames) / sampling_rate_hz) * 1000, 2)
    
    # ============================================================
    # INTERPOLATION METHOD CLASSIFICATION
    # ============================================================
    # Detect if linear fallback was used (scientific compromise)
    # Spline/CubicSpline = preferred (smooth, preserves acceleration)
    # Linear = fallback (flattens acceleration, loses high-frequency content)
    
    method_lower = interpolation_method.lower()
    
    if 'linear' in method_lower and 'quaternion' not in method_lower:
        # Pure linear interpolation - ORANGE WARNING
        method_category = "üü† Linear Fallback"
        method_note = "Linear interpolation flattens acceleration (scientific compromise)"
    elif 'spline' in method_lower or 'cubic' in method_lower:
        # Spline-based - PREFERRED
        method_category = "‚úÖ Spline/Cubic"
        method_note = "Smooth interpolation preserving acceleration"
    elif 'slerp' in method_lower or 'quaternion' in method_lower:
        # Quaternion interpolation - GOOD for rotations
        method_category = "‚úÖ Quaternion (SLERP)"
        method_note = "Spherical interpolation for rotations"
    else:
        # Unknown method
        method_category = "‚ö†Ô∏è Unknown"
        method_note = "Interpolation method not documented"
    
    # ============================================================
    # TRANSPARENCY STATUS (Winter 2009: "No Silent Fixes")
    # ============================================================
    transparency_issues = []
    
    # Check 1: Excessive missing data
    if frames_fixed_percent > MAX_ACCEPTABLE_MISSING_PERCENT:
        transparency_issues.append(f"Missing {frames_fixed_percent:.2f}% > {MAX_ACCEPTABLE_MISSING_PERCENT}%")
    
    # Check 2: Linear fallback used (flag as compromise)
    if "Linear Fallback" in method_category and frames_fixed_percent > 0:
        transparency_issues.append(f"Linear fallback used (acceleration flattened)")
    
    # Check 3: Large gaps that required reconstruction
    if max_gap_frames > 50:  # ~0.4s at 120Hz
        transparency_issues.append(f"Large gap: {max_gap_frames} frames ({max_gap_ms}ms)")
    
    # Determine overall transparency status
    if len(transparency_issues) > 0:
        transparency_status = "‚ö†Ô∏è REVIEW"
        transparency_notes = "; ".join(transparency_issues)
    else:
        if frames_fixed_percent == 0.0:
            transparency_status = "‚úÖ PRISTINE"
            transparency_notes = "No interpolation required (pristine data)"
        else:
            transparency_status = "‚úÖ OK"
            transparency_notes = "Minor gaps filled with appropriate method"
    
    interpolation_data.append({
        'Run_ID': run_id,
        'Raw_Missing_%': round(raw_missing_percent, 2),
        'Frames_Fixed_%': round(frames_fixed_percent, 2),
        'Max_Gap_Frames': max_gap_frames,
        'Max_Gap_ms': max_gap_ms,
        'Interpolation_Method': interpolation_method,
        'Method_Category': method_category,
        'Method_Note': method_note,
        'Transparency_Status': transparency_status,
        'Notes': transparency_notes
    })

# Create DataFrame from interpolation data (dictionary-first approach)
df_interpolation = pd.DataFrame(interpolation_data)

print("="*80)
print("SECTION 3: GAP & INTERPOLATION TRANSPARENCY (Winter, 2009)")
print("="*80)
print(f"Total Runs: {len(df_interpolation)}")
print(f"Principle: 'No Silent Fixes' - Full disclosure of data reconstruction")
print(f"Threshold: Missing data ‚â§ {MAX_ACCEPTABLE_MISSING_PERCENT}%")
print("="*80)
print()

# Summary statistics
pristine_count = (df_interpolation['Transparency_Status'] == '‚úÖ PRISTINE').sum()
ok_count = (df_interpolation['Transparency_Status'] == '‚úÖ OK').sum()
review_count = (df_interpolation['Transparency_Status'] == '‚ö†Ô∏è REVIEW').sum()
linear_fallback_count = (df_interpolation['Method_Category'].str.contains('Linear Fallback')).sum()

print(f"Interpolation Summary:")
print(f"  ‚úÖ PRISTINE (no gaps): {pristine_count}/{len(df_interpolation)}")
print(f"  ‚úÖ OK (minor gaps): {ok_count}/{len(df_interpolation)}")
print(f"  ‚ö†Ô∏è REVIEW (issues): {review_count}/{len(df_interpolation)}")
print(f"  üü† Linear Fallback used: {linear_fallback_count}/{len(df_interpolation)}")
print()

# Display interpolation table
display(df_interpolation)

# Display detailed notes for all runs (transparency principle)
print()
print("="*80)
print("INTERPOLATION TRANSPARENCY NOTES (Per Winter 2009):")
print("="*80)
for idx, row in df_interpolation.iterrows():
    print(f"\n{row['Run_ID']}:")
    print(f"  Method: {row['Method_Category']}")
    print(f"  Frames Fixed: {row['Frames_Fixed_%']}%")
    print(f"  Max Gap: {row['Max_Gap_Frames']} frames ({row['Max_Gap_ms']}ms)")
    print(f"  Status: {row['Transparency_Status']}")
    print(f"  Note: {row['Notes']}")
    
    # Additional warning for linear fallback
    if "Linear Fallback" in row['Method_Category']:
        print(f"  ‚ö†Ô∏è  CRITICAL: {row['Method_Note']}")
        print(f"  ‚ö†Ô∏è  Impact: High-frequency acceleration data may be attenuated")

print()
print("="*80)
print("NOTE: Per Winter (2009), all data reconstruction must be fully disclosed.")
print("="*80)

# ============================================================
# PER-JOINT INTERPOLATION DETAILS (Enhancement 2)
# ============================================================
print()
print("="*80)
print("PER-JOINT INTERPOLATION DETAILS (Enhancement 2):")
print("="*80)

# Build per-joint table if available in any run
all_joint_details = []
for run_id, steps in complete_runs.items():
    s02 = steps.get('step_02', {})
    per_joint_data = safe_get(s02, 'interpolation_per_joint', default={})
    
    if per_joint_data and isinstance(per_joint_data, dict) and len(per_joint_data) > 0:
        for joint_name, details in per_joint_data.items():
            method = safe_get(details, 'method', default='N/A')
            method_category = safe_get(details, 'method_category', default=method)
            frames_fixed_pct = safe_float(safe_get(details, 'frames_fixed_percent'), default=0.0)
            max_gap = safe_get(details, 'max_gap_frames', default=0)
            
            # Determine method display with color coding
            if 'linear_fallback' in method_category.lower():
                method_display = "üü† Linear Fallback"
                note = "Acceleration flattened (scientific compromise)"
            elif 'none' in method or frames_fixed_pct == 0.0:
                method_display = "‚úÖ Pristine"
                note = "No interpolation required"
            elif 'quaternion' in method.lower() or 'slerp' in method.lower():
                method_display = "‚úÖ Quaternion/SLERP"
                note = "Smooth spherical interpolation"
            elif 'spline' in method.lower() or 'cubic' in method.lower():
                method_display = "‚úÖ Spline/Cubic"
                note = "Smooth interpolation"
            else:
                method_display = f"‚úÖ {method}"
                note = "Standard interpolation"
            
            all_joint_details.append({
                'Run_ID': run_id,
                'Joint': joint_name,
                'Method': method_display,
                'Frames_Fixed_%': round(frames_fixed_pct, 2),
                'Max_Gap_Frames': max_gap,
                'Note': note
            })

if len(all_joint_details) > 0:
    df_joint_interp = pd.DataFrame(all_joint_details)
    
    # Filter to show only joints that needed interpolation or had issues
    df_joint_interp_filtered = df_joint_interp[df_joint_interp['Frames_Fixed_%'] > 0]
    
    if len(df_joint_interp_filtered) > 0:
        print(f"\nJoints requiring interpolation: {len(df_joint_interp_filtered)}")
        print(f"üü† Linear Fallback cases: {(df_joint_interp['Method'].str.contains('Linear Fallback')).sum()}")
        print()
        display(df_joint_interp_filtered.head(20))  # Show top 20 joints
        
        # Highlight linear fallback cases
        linear_fallback_joints = df_joint_interp_filtered[
            df_joint_interp_filtered['Method'].str.contains('Linear Fallback')
        ]
        if len(linear_fallback_joints) > 0:
            print()
            print("="*80)
            print("üü† LINEAR FALLBACK JOINTS (Acceleration Compromised):")
            print("="*80)
            for idx, row in linear_fallback_joints.iterrows():
                print(f"{row['Run_ID']} | {row['Joint']}: {row['Frames_Fixed_%']}% fixed, Max gap: {row['Max_Gap_Frames']} frames")
            print("="*80)
    else:
        print("\n‚úÖ All joints have pristine data (no interpolation required)")
else:
    print("\n‚ö†Ô∏è  Per-joint interpolation details not available in current pipeline")
    print("   Run Enhancement 2 on notebooks/02_preprocess.ipynb to populate this data")
    print("   See: PIPELINE_ENHANCEMENTS_SUMMARY.md for instructions")

print()
print("="*80)

---

## Section 4: Winter's Residual Validation
**Purpose:** Justify the filtering frequency (Winter, 2009) - Signal vs. Noise separation

In [None]:
# ============================================================
# SECTION 4: Winter's Residual Validation
# ============================================================

# Import matplotlib for plotting
import matplotlib.pyplot as plt
from IPython.display import Image, display as ipy_display

# Define thresholds per Winter (2009)
ARBITRARY_FILTERING_FLAG = True  # Flag if no knee point found
MIN_CUTOFF_HZ = 4.0  # Minimum reasonable cutoff for dance (too low = over-smoothing)
MAX_CUTOFF_HZ = 12.0  # Maximum reasonable cutoff (too high = noise retained)

# Build Winter residual validation table (dictionary-first approach)
winter_data = []

for run_id, steps in complete_runs.items():
    s04 = steps.get('step_04', {})
    
    # ============================================================
    # FILTER PARAMETERS & WINTER ANALYSIS STATUS
    # ============================================================
    # Extract filter parameters
    filter_params = safe_get(s04, 'filter_params', default={})
    
    filter_type = safe_get(filter_params, 'filter_type', default='unknown')
    filter_method = safe_get(filter_params, 'filter_method', default='N/A')
    cutoff_hz = safe_float(safe_get(filter_params, 'filter_cutoff_hz'), default=0.0)
    filter_order = safe_get(filter_params, 'filter_order', default='N/A')
    filter_range = safe_get(filter_params, 'filter_range_hz', default=[1, 12])
    
    # Check if Winter Residual Analysis was performed
    winter_analysis_failed = safe_get(filter_params, 'winter_analysis_failed', default=None)
    expected_dance_range = safe_get(filter_params, 'expected_dance_range', default='N/A')
    
    # Representative column used for analysis
    representative_col = safe_get(filter_params, 'representative_column', default='N/A')
    
    # ============================================================
    # WINTER ANALYSIS STATUS CLASSIFICATION
    # ============================================================
    # Determine if Winter's method was used and if it succeeded
    
    if winter_analysis_failed is None:
        # Winter analysis not attempted (older pipeline or different method)
        winter_status = "‚ö†Ô∏è Not Performed"
        winter_note = "Winter Residual Analysis not attempted - cutoff may be arbitrary"
        knee_point_found = False
    elif winter_analysis_failed == False:
        # Winter analysis succeeded - knee point found
        winter_status = "‚úÖ Validated"
        winter_note = f"Knee point found at {cutoff_hz}Hz (signal/noise separation justified)"
        knee_point_found = True
    else:
        # Winter analysis failed - signal too noisy, no clear knee point
        winter_status = "üî¥ ARBITRARY"
        winter_note = "Knee point NOT found - signal too noisy, filtering is arbitrary"
        knee_point_found = False
    
    # ============================================================
    # CUTOFF FREQUENCY VALIDATION
    # ============================================================
    # Check if cutoff is within reasonable biomechanical range
    cutoff_issues = []
    
    if cutoff_hz < MIN_CUTOFF_HZ:
        cutoff_issues.append(f"Cutoff {cutoff_hz}Hz < {MIN_CUTOFF_HZ}Hz (over-smoothing risk)")
    
    if cutoff_hz > MAX_CUTOFF_HZ:
        cutoff_issues.append(f"Cutoff {cutoff_hz}Hz > {MAX_CUTOFF_HZ}Hz (noise retention risk)")
    
    if not knee_point_found:
        cutoff_issues.append("No knee point found (arbitrary cutoff)")
    
    # Overall Section 4 status
    if len(cutoff_issues) > 0:
        section4_status = "‚ö†Ô∏è REVIEW"
        section4_notes = "; ".join(cutoff_issues)
    else:
        section4_status = "‚úÖ OK"
        section4_notes = "Cutoff justified by Winter Residual Analysis"
    
    # ============================================================
    # CHECK FOR RESIDUAL PLOT EXISTENCE
    # ============================================================
    # Check if Winter residual plot exists for this run
    filter_plot_path = os.path.join(PROJECT_ROOT, "derivatives", "step_04_filtering",
                                    f"{run_id}__filter_check.png")
    psd_plot_path = os.path.join(PROJECT_ROOT, "derivatives", "step_04_filtering",
                                 f"{run_id}__filter_psd_validation.png")
    
    has_filter_plot = os.path.exists(filter_plot_path)
    has_psd_plot = os.path.exists(psd_plot_path)
    
    winter_data.append({
        'Run_ID': run_id,
        'Filter_Type': filter_type,
        'Filter_Method': filter_method,
        'Cutoff_Hz': cutoff_hz,
        'Filter_Order': filter_order,
        'Filter_Range_Hz': f"{filter_range[0]}-{filter_range[1]}" if isinstance(filter_range, list) else str(filter_range),
        'Expected_Range': expected_dance_range,
        'Winter_Status': winter_status,
        'Knee_Point_Found': '‚úÖ Yes' if knee_point_found else '‚ùå No',
        'Representative_Signal': representative_col,
        'Has_Residual_Plot': '‚úÖ Yes' if has_filter_plot else '‚ùå No',
        'Section4_Status': section4_status,
        'Notes': section4_notes,
        'Filter_Plot_Path': filter_plot_path if has_filter_plot else 'N/A',
        'PSD_Plot_Path': psd_plot_path if has_psd_plot else 'N/A'
    })

# Create DataFrame from Winter data (dictionary-first approach)
df_winter = pd.DataFrame(winter_data)

print("="*80)
print("SECTION 4: WINTER'S RESIDUAL VALIDATION (Winter, 2009)")
print("="*80)
print(f"Total Runs: {len(df_winter)}")
print(f"Principle: Filter cutoff must be justified by RMS Residual Analysis")
print(f"Acceptable Range: {MIN_CUTOFF_HZ}-{MAX_CUTOFF_HZ} Hz for dance movements")
print("="*80)
print()

# Summary statistics
validated_count = (df_winter['Winter_Status'] == '‚úÖ Validated').sum()
arbitrary_count = (df_winter['Winter_Status'] == 'üî¥ ARBITRARY').sum()
not_performed_count = (df_winter['Winter_Status'] == '‚ö†Ô∏è Not Performed').sum()
knee_found_count = (df_winter['Knee_Point_Found'] == '‚úÖ Yes').sum()

print(f"Winter Residual Analysis Summary:")
print(f"  ‚úÖ Validated (knee point found): {validated_count}/{len(df_winter)}")
print(f"  üî¥ ARBITRARY (no knee point): {arbitrary_count}/{len(df_winter)}")
print(f"  ‚ö†Ô∏è Not Performed: {not_performed_count}/{len(df_winter)}")
print(f"  Knee points detected: {knee_found_count}/{len(df_winter)}")
print()

# Display Winter validation table
display(df_winter[[
    'Run_ID', 'Filter_Method', 'Cutoff_Hz', 'Filter_Range_Hz',
    'Winter_Status', 'Knee_Point_Found', 'Section4_Status', 'Notes'
]])

# Display detailed analysis for each run
print()
print("="*80)
print("WINTER RESIDUAL ANALYSIS DETAILS (Per Run):")
print("="*80)

for idx, row in df_winter.iterrows():
    print(f"\n{row['Run_ID']}:")
    print(f"  Filter Method: {row['Filter_Method']}")
    print(f"  Selected Cutoff: {row['Cutoff_Hz']} Hz")
    print(f"  Search Range: {row['Filter_Range_Hz']} Hz")
    print(f"  Expected (Dance): {row['Expected_Range']}")
    print(f"  Winter Status: {row['Winter_Status']}")
    print(f"  Knee Point Found: {row['Knee_Point_Found']}")
    print(f"  Representative Signal: {row['Representative_Signal']}")
    print(f"  Status: {row['Section4_Status']}")
    print(f"  Note: {row['Notes']}")
    
    # Flag arbitrary filtering cases
    if row['Winter_Status'] == 'üî¥ ARBITRARY':
        print(f"\n  üî¥ CRITICAL WARNING: Filtering is ARBITRARY")
        print(f"     - No clear signal/noise separation found")
        print(f"     - RMS residual curve has no distinct knee point")
        print(f"     - Cutoff frequency cannot be objectively justified")
        print(f"     - Consider: signal too noisy, markers poorly tracked, or movement too random")
    
    # Display residual plot if available
    if row['Has_Residual_Plot'] == '‚úÖ Yes':
        print(f"\n  üìä Winter Residual Plot Available:")
        print(f"     Path: {row['Filter_Plot_Path']}")
        
        # Optionally display the plot inline
        try:
            ipy_display(Image(filename=row['Filter_Plot_Path']))
        except Exception as e:
            print(f"     (Plot display failed: {e})")

print()
print("="*80)
print("WINTER'S METHOD INTERPRETATION:")
print("="*80)
print("‚úÖ Knee Point Found: Signal and noise clearly separated")
print("   ‚Üí Cutoff frequency is JUSTIFIED and OBJECTIVE")
print()
print("üî¥ No Knee Point (ARBITRARY): Signal too noisy or poorly tracked")
print("   ‚Üí Cutoff frequency is SUBJECTIVE and may over-smooth or under-filter")
print("   ‚Üí Data quality should be reviewed before accepting results")
print()
print("‚ö†Ô∏è Not Performed: Winter analysis not run (legacy or fixed cutoff)")
print("   ‚Üí Cutoff may be arbitrary - verify if appropriate for movement type")
print("="*80)

---

## Building Master Summary Table
**Internal Processing:** Aggregating all JSON summaries into the Truth Table

In [2]:
# ============================================================
# Master Row Schema (fixed columns per specification)
# ============================================================
all_summaries = []

for run_id, steps in complete_runs.items():
    s01 = steps.get('step_01', {})
    s02 = steps.get('step_02', {})
    s04 = steps.get('step_04', {})
    s05 = steps.get('step_05', {})
    s06 = steps.get('step_06', {})
    
    # ============================================================
    # Canonical Fields (normalize differences per specification)
    # ============================================================
    
    # Sampling rate: fps = first_available with fallback to 120.0
    fps = safe_float(
        safe_get(s01, 'raw_data_quality', 'sampling_rate_actual',
                'fs_actual_hz', 'sampling_rate_hz'),
        default=120.0
    )
    
    # Reference status: normalize to uppercase
    ref_status_raw = safe_get(s05, 'reference_metrics', 'ref_quality_status', default='MISSING')
    ref_status = str(ref_status_raw).upper()
    
    # ============================================================
    # Gap Units Logic (use ms directly when available)
    # ============================================================
    max_gap_frames = safe_get(s02, 'max_interpolation_gap', default=0)
    max_gap_ms_raw = safe_get(s02, 'max_gap_ms')  # Check if ms is directly available
    
    if max_gap_ms_raw != 'N/A' and max_gap_ms_raw is not None:
        # Use ms directly when available
        max_gap_ms = safe_float(max_gap_ms_raw)
    else:
        # Compute ms from frames only if ms is missing
        max_gap_ms = round((safe_float(max_gap_frames) / fps) * 1000, 2)
    
    # ============================================================
    # Build Master Row
    # ============================================================
    row = {
        # --- Identity ---
        "Run_ID": run_id,
        "Processing_Date": safe_get(s01, 'identity', 'processing_timestamp'),
        
        # --- Step01 Fields ---
        "OptiTrack_Error_mm": safe_float(safe_get(s01, 'raw_data_quality', 'optitrack_mean_error_mm')),
        "Total_Frames": safe_get(s01, 'raw_data_quality', 'total_frames', default=0),
        
        # --- Step02 Fields ---
        "Missing_Raw_%": safe_float(safe_get(s02, 'raw_missing_percent')),
        "Max_Gap_Frames": max_gap_frames,
        "Max_Gap_MS": max_gap_ms,
        "Bone_Stability_CV": safe_float(safe_get(s02, 'bone_qc_mean_cv')),
        "Skeletal_Alerts": safe_get(s02, 'bone_qc_alerts', default=0),
        "Worst_Bone": safe_get(s02, 'worst_bone'),
        
        # --- Step05 Fields ---
        "Ref_Stability_mm": safe_float(safe_get(s05, 'reference_metrics', 'ref_stability_mm')),
        "Ref_Status": ref_status,
        
        # --- Step06 Signal Quality ---
        "Signal_Noise_RMS": safe_float(safe_get(s06, 'signal_quality', 'avg_vel_residual_rms')),
        "Dom_Freq_Hz": safe_float(safe_get(s06, 'signal_quality', 'avg_dominant_freq_hz')),
        "Quat_Norm_Error": safe_float(safe_get(s06, 'signal_quality', 'max_quat_norm_error')),
        
        # --- Step06 Kinematics ---
        "Max_Ang_Vel": safe_float(safe_get(s06, 'metrics', 'angular_velocity', 'max')),
        "Mean_Ang_Vel": safe_float(safe_get(s06, 'metrics', 'angular_velocity', 'mean')),
        "Max_Lin_Acc": safe_float(safe_get(s06, 'metrics', 'linear_accel', 'max')),
        "Outlier_Frames": safe_get(s06, 'effort_metrics', 'outlier_frame_count', default=0),
        
        # --- Step06 Effort Metrics ---
        "Path_Length_M": round(safe_float(safe_get(s06, 'effort_metrics', 'total_path_length_mm')) / 1000, 2),
        "Intensity_Index": safe_float(safe_get(s06, 'effort_metrics', 'intensity_index')),
        
        # --- Overall Status ---
        "Pipeline_Status": safe_get(s06, 'overall_status'),
    }
    
    # ============================================================
    # Quality Scoring (labeled heuristic per specification)
    # ============================================================
    score = 100.0
    
    # Penalties for data quality
    score -= safe_float(row["Missing_Raw_%"]) * 5
    score -= (safe_float(row["Max_Gap_MS"]) / 10)  # Penalty for large gaps
    
    # Penalties for skeletal stability
    score -= safe_float(row["Bone_Stability_CV"]) * 10 
    score -= safe_float(row["Skeletal_Alerts"]) * 5
    
    # Penalty for reference stability
    ref_stab = safe_float(row["Ref_Stability_mm"])
    if ref_stab > 4.0: 
        score -= 15
    
    # Penalty for signal quality issues
    if safe_float(row["Quat_Norm_Error"]) > 0.1:
        score -= 10
    
    row["Quality_Score"] = round(max(0, min(100, score)), 2)
    row["Quality_Score_Method"] = "heuristic_v1"  # Required label per specification
    
    # ============================================================
    # Research Decision Rule (deterministic per specification)
    # ============================================================
    if (row["Pipeline_Status"] == "PASS" and 
        row["Quality_Score"] >= 75 and 
        row["Ref_Status"] == "PASS" and 
        safe_float(row["Bone_Stability_CV"]) < 1.5):
        row["Research_Decision"] = "ACCEPT"
    elif row["Pipeline_Status"] == "PASS" and row["Quality_Score"] >= 50:
        row["Research_Decision"] = "REVIEW"
    else:
        row["Research_Decision"] = "REJECT"
    
    all_summaries.append(row)

print(f"üìä Processed {len(all_summaries)} complete runs")

üìä Processed 3 complete runs


In [3]:
# ============================================================
# Export to Excel (per specification)
# ============================================================
if not all_summaries:
    print("‚ùå No complete runs found to aggregate!")
else:
    df_master = pd.DataFrame(all_summaries)
    df_master = df_master.sort_values('Quality_Score', ascending=False).reset_index(drop=True)
    
    REPORTS_DIR = os.path.join(PROJECT_ROOT, "reports")
    os.makedirs(REPORTS_DIR, exist_ok=True)
    excel_path = os.path.join(REPORTS_DIR, f"Master_Audit_Log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx")
    
    with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer:
        df_master.to_excel(writer, index=False, sheet_name='Audit_Log')
        workbook = writer.book
        worksheet = writer.sheets['Audit_Log']
        
        # Header styling
        header_fmt = workbook.add_format({
            'bold': True, 
            'bg_color': '#4472C4', 
            'font_color': 'white',
            'text_wrap': True
        })
        for col_num, value in enumerate(df_master.columns.values):
            worksheet.write(0, col_num, value, header_fmt)
        
        # Conditional formatting for Research_Decision (green/yellow/red)
        red_fmt = workbook.add_format({'bg_color': '#FFC7CE', 'font_color': '#9C0006'})
        yellow_fmt = workbook.add_format({'bg_color': '#FFEB9C', 'font_color': '#9C6500'})
        green_fmt = workbook.add_format({'bg_color': '#C6EFCE', 'font_color': '#006100'})
        
        col_idx = df_master.columns.get_loc("Research_Decision")
        for row_num in range(1, len(df_master) + 1):
            decision = df_master.iloc[row_num-1]['Research_Decision']
            if decision == 'ACCEPT':
                worksheet.write(row_num, col_idx, decision, green_fmt)
            elif decision == 'REVIEW':
                worksheet.write(row_num, col_idx, decision, yellow_fmt)
            else:
                worksheet.write(row_num, col_idx, decision, red_fmt)
        
        # Auto-fit column widths capped at 40 characters
        for i, col in enumerate(df_master.columns):
            max_len = max(
                df_master[col].astype(str).str.len().max(),
                len(str(col))
            )
            worksheet.set_column(i, i, min(max_len + 2, 40))
    
    print(f"\n{'='*70}")
    print(f"üéâ Master Audit Log Created")
    print(f"{'='*70}")
    print(f"üìä Total Runs: {len(all_summaries)}")
    print(f"üíæ File: {excel_path}")
    print(f"{'='*70}\n")
    
    print("Decision Summary:")
    print(df_master['Research_Decision'].value_counts())
    
    print("\nQuality Score Stats:")
    print(f"  Mean: {df_master['Quality_Score'].mean():.2f}")
    print(f"  Min:  {df_master['Quality_Score'].min():.2f}")
    print(f"  Max:  {df_master['Quality_Score'].max():.2f}")
    
    print("\nQuality Score Method:")
    print(f"  {df_master['Quality_Score_Method'].iloc[0] if len(df_master) > 0 else 'N/A'}")
    
    print("\nPreview:")
    display(df_master.head())


üéâ Master Audit Log Created
üìä Total Runs: 3
üíæ File: c:\Users\drorh\OneDrive - Mobileye\Desktop\gaga\reports\Master_Audit_Log_20260119_190230.xlsx

Decision Summary:
Research_Decision
ACCEPT    2
REVIEW    1
Name: count, dtype: int64

Quality Score Stats:
  Mean: 87.79
  Min:  87.59
  Max:  87.91

Quality Score Method:
  heuristic_v1

Preview:


Unnamed: 0,Run_ID,Processing_Date,OptiTrack_Error_mm,Total_Frames,Missing_Raw_%,Max_Gap_Frames,Max_Gap_MS,Bone_Stability_CV,Skeletal_Alerts,Worst_Bone,...,Max_Ang_Vel,Mean_Ang_Vel,Max_Lin_Acc,Outlier_Frames,Path_Length_M,Intensity_Index,Pipeline_Status,Quality_Score,Quality_Score_Method,Research_Decision
0,763_T2_P2_R2_Take_2025-12-25 10.51.23 AM_005,2026-01-14 13:51,0.0,17263,0.0,10,83.33,0.376,0,Hips->Spine,...,1359.12,113.83,44376.02,40,61.28,0.291,PASS,87.91,heuristic_v1,ACCEPT
1,734_T1_P2_R1_Take 2025-12-01 02.28.24 PM,2026-01-13 20:35,0.0,19617,0.0,10,83.33,0.379,0,Hips->Spine,...,900.17,44.92,19427.84,0,35.93,0.386,PASS,87.88,heuristic_v1,ACCEPT
2,734_T1_P1_R1_Take 2025-12-01 02.18.27 PM,2026-01-19 18:35,0.0,30798,0.0,10,83.33,0.408,"[Hips->Spine, Neck->Head]",Hips->Spine,...,1026.98,31.98,38536.2,0,25.67,0.084,PASS,87.59,heuristic_v1,REVIEW


---

## Section 5: ISB Compliance & Synchronized Visualization
**Purpose:** Visual Proof - ISB-compliant Euler sequences + Interactive time-synced stick figure with LCS

In [None]:
# ============================================================
# SECTION 5: ISB Compliance & Synchronized Visualization
# ============================================================

# Import visualization module
from interactive_viz import (
    verify_isb_compliance,
    create_interactive_synchronized_viz,
    create_static_lcs_snapshot
)
import plotly.io as pio

# Define visualization parameters
SHOW_LCS_FOR = ['LeftShoulder', 'RightShoulder', 'Hips', 'Spine1']  # Key joints
LCS_AXIS_LENGTH = 100.0  # mm
SAMPLE_FRAMES = 300  # For performance (full dataset can be slow)

print("="*80)
print("SECTION 5: ISB COMPLIANCE & SYNCHRONIZED VISUALIZATION")
print("="*80)
print("Purpose: Visual Proof - Verify ISB standards + Interactive time-synced anatomy")
print("="*80)
print()

# ============================================================
# PART 1: ISB EULER SEQUENCE VERIFICATION
# ============================================================

print("PART 1: ISB Euler Sequence Verification")
print("-" * 80)

isb_compliance_data = []

for run_id, steps in complete_runs.items():
    print(f"\n{run_id}:")
    
    # Path to Euler validation JSON (from notebook 06)
    euler_validation_path = os.path.join(
        PROJECT_ROOT, "derivatives", "step_06_rotvec",
        f"{run_id}__euler_validation.json"
    )
    
    # Verify ISB compliance
    df_compliance, summary = verify_isb_compliance(euler_validation_path)
    
    if df_compliance is not None:
        # Display summary
        print(f"  Total Joints: {summary['total_joints']}")
        print(f"  ‚úÖ Compliant: {summary['compliant_joints']}")
        print(f"  ‚ö†Ô∏è ROM Violations: {summary['violation_joints']}")
        
        if summary['violation_joints'] > 0:
            print(f"  Violated Joints: {', '.join(summary['violated_joints'][:5])}")
            if len(summary['violated_joints']) > 5:
                print(f"    ... and {len(summary['violated_joints']) - 5} more")
        
        overall_status = "‚úÖ PASS" if summary['overall_status'] == 'PASS' else "‚ö†Ô∏è REVIEW"
        print(f"  Overall Status: {overall_status}")
        
        # Store for summary table
        isb_compliance_data.append({
            'Run_ID': run_id,
            'Total_Joints': summary['total_joints'],
            'Compliant': summary['compliant_joints'],
            'ROM_Violations': summary['violation_joints'],
            'Overall_Status': overall_status,
            'Notes': f"{summary['violation_joints']} joints exceed anatomical ROM (with Gaga 15% tolerance)"
                     if summary['violation_joints'] > 0 
                     else "All joints within ISB-defined ROM limits"
        })
        
        # Display detailed compliance table (first 10 joints)
        print("\n  ISB Sequence Verification (sample):")
        display(df_compliance.head(10))
        
    else:
        print(f"  ‚ùå ERROR: {summary.get('error', 'Unknown error')}")
        isb_compliance_data.append({
            'Run_ID': run_id,
            'Total_Joints': 0,
            'Compliant': 0,
            'ROM_Violations': 0,
            'Overall_Status': '‚ùå NO_DATA',
            'Notes': 'Euler validation not available - run notebook 06 first'
        })

# Create ISB compliance summary table
df_isb = pd.DataFrame(isb_compliance_data)

print("\n" + "="*80)
print("ISB COMPLIANCE SUMMARY")
print("="*80)
display(df_isb)

print("\n" + "="*80)
print("INTERPRETATION:")
print("="*80)
print("‚úÖ PASS: All joints use correct ISB sequences and stay within anatomical ROM")
print("‚ö†Ô∏è REVIEW: Some joints exceed anatomical ROM (may be valid for Gaga expressive dance)")
print("‚ùå NO_DATA: Euler validation not performed - integrate notebook 06 ISB conversion")
print()

# ============================================================
# PART 2: INTERACTIVE SYNCHRONIZED VISUALIZATION
# ============================================================

print("\n" + "="*80)
print("PART 2: Interactive Synchronized Visualization")
print("="*80)
print("Creating time-synced stick figure with LCS + kinematic plots...")
print()

# Select a run to visualize (use first run with data)
visualization_runs = [rid for rid, steps in complete_runs.items() 
                     if 'step_06' in steps]

if len(visualization_runs) > 0:
    viz_run_id = visualization_runs[0]
    print(f"Visualizing: {viz_run_id}")
    print()
    
    # Load kinematic data (from step 06)
    kinematics_path = os.path.join(
        PROJECT_ROOT, "derivatives", "step_06_rotvec",
        f"{viz_run_id}__kinematics_full.parquet"
    )
    
    if os.path.exists(kinematics_path):
        print(f"Loading kinematics: {kinematics_path}")
        df_kin = pd.read_parquet(kinematics_path)
        
        # Load skeleton hierarchy
        hierarchy_path = os.path.join(PROJECT_ROOT, "config", "skeleton_hierarchy.json")
        with open(hierarchy_path) as f:
            hierarchy_data = json.load(f)
        
        bone_hierarchy = [(b['parent'], b['child']) for b in hierarchy_data.get('bones', [])]
        joint_names = list(set([b['parent'] for b in hierarchy_data.get('bones', [])] + 
                               [b['child'] for b in hierarchy_data.get('bones', [])]))
        
        print(f"Loaded {len(df_kin)} frames, {len(joint_names)} joints")
        print()
        
        # ============================================================
        # STATIC SNAPSHOT (for documentation/reports)
        # ============================================================
        print("Creating static LCS snapshot (mid-performance frame)...")
        mid_frame = len(df_kin) // 2
        
        fig_static = create_static_lcs_snapshot(
            df=df_kin,
            joint_names=joint_names,
            bone_hierarchy=bone_hierarchy,
            frame_idx=mid_frame,
            show_lcs_for=SHOW_LCS_FOR,
            axis_length=LCS_AXIS_LENGTH
        )
        
        # Save static figure
        static_path = os.path.join(PROJECT_ROOT, "reports", 
                                  f"{viz_run_id}_lcs_static.html")
        pio.write_html(fig_static, static_path)
        print(f"‚úÖ Static snapshot saved: {static_path}")
        
        # Display static figure
        fig_static.show()
        
        print()
        print("-" * 80)
        
        # ============================================================
        # INTERACTIVE SYNCHRONIZED VISUALIZATION (THE BIG ONE)
        # ============================================================
        print("Creating interactive synchronized visualization...")
        print("  This includes:")
        print("    - 3D skeleton with LCS axes (X/Y/Z arrows)")
        print("    - Position plot (X, Y, Z components)")
        print("    - Velocity plot (speed magnitude)")
        print("    - Shared slider for time synchronization")
        print()
        
        fig_interactive = create_interactive_synchronized_viz(
            df=df_kin,
            joint_names=joint_names,
            bone_hierarchy=bone_hierarchy,
            show_lcs_for=SHOW_LCS_FOR,
            axis_length=LCS_AXIS_LENGTH,
            sample_frames=SAMPLE_FRAMES
        )
        
        # Save interactive figure
        interactive_path = os.path.join(PROJECT_ROOT, "reports",
                                       f"{viz_run_id}_interactive_synced.html")
        pio.write_html(fig_interactive, interactive_path)
        print(f"‚úÖ Interactive visualization saved: {interactive_path}")
        print()
        
        # Display interactive figure
        print("üìä INTERACTIVE VISUALIZATION:")
        print("   ‚Üí Use the slider to move through time")
        print("   ‚Üí All three plots update simultaneously")
        print("   ‚Üí Verify LCS axes remain stable (no spinning)")
        print("   ‚Üí Press ‚ñ∂ Play to animate")
        print()
        
        fig_interactive.show()
        
    else:
        print(f"‚ùå ERROR: Kinematics file not found: {kinematics_path}")
        print("   Run notebook 06 to generate kinematic derivatives")

else:
    print("‚ùå No runs with step_06 data available for visualization")
    print("   Run notebook 06 (Euler/Omega) first to generate kinematic data")

print()
print("="*80)
print("SECTION 5 COMPLETE")
print("="*80)
print("‚úÖ ISB Compliance: Verified joint-specific Euler sequences")
print("‚úÖ Visual Proof: Interactive synchronized stick figure with LCS")
print("‚úÖ Time-Sync: Slider updates skeleton + kinematic plots simultaneously")
print()
print("SUPERVISOR INSTRUCTIONS:")
print("  1. Check ISB Compliance table - all joints should show correct sequences")
print("  2. Use slider to move through performance")
print("  3. Verify LCS axes (X/Y/Z arrows) remain stable - no erratic spinning")
print("  4. Confirm kinematic plots sync with skeleton movement")
print("  5. Look for anomalies: marker swaps, gimbal lock, unnatural motion")
print("="*80)

---

## Section 6: Gaga-Aware Biomechanics
**Purpose:** Distinguish "Intense Dance" from "System Error" (Longo et al., 2022) - Intelligent outlier detection

In [None]:
# ============================================================
# SECTION 6: Gaga-Aware Biomechanics
# ============================================================

print("="*80)
print("SECTION 6: GAGA-AWARE BIOMECHANICS")
print("="*80)
print("Purpose: Distinguish 'Intense Dance' from 'System Error' (Longo et al., 2022)")
print("="*80)
print()

# ============================================================
# BIOMECHANICAL BENCHMARKS & THRESHOLDS
# ============================================================

# Literature-based normal gait ranges (Wu et al., 2002; Longo et al., 2022)
NORMAL_GAIT_BENCHMARKS = {
    # Joint: (mean_rom_deg, std_rom_deg, max_angular_vel_deg_s)
    'Shoulder': (120, 30, 300),      # Flexion/Extension
    'Elbow': (140, 15, 400),          # Flexion
    'Hip': (100, 20, 250),            # Flexion/Extension
    'Knee': (130, 20, 400),           # Flexion
    'Ankle': (40, 10, 300),           # Dorsi/Plantar flexion
    'Spine': (60, 15, 150),           # Flexion/Extension
}

# Gaga dance tolerance multipliers (from Longo et al., 2022 - expressive dance)
GAGA_ROM_MULTIPLIER = 1.5        # Allow 50% more ROM than normal gait
GAGA_VELOCITY_MULTIPLIER = 2.0   # Allow 2x angular velocity for dance

# Physically impossible thresholds (HARD LIMITS - marker swap indicators)
PHYSICALLY_IMPOSSIBLE = {
    'Shoulder': {'rom': 200, 'velocity': 1000},   # > 200¬∞ ROM or > 1000¬∞/s = swap
    'Elbow': {'rom': 160, 'velocity': 1200},      # Elbow hyper-extension > 160¬∞ impossible
    'Hip': {'rom': 180, 'velocity': 800},
    'Knee': {'rom': 160, 'velocity': 1000},       # Knee backward flexion impossible
    'Ankle': {'rom': 100, 'velocity': 800},
    'Spine': {'rom': 120, 'velocity': 500},
}

# Standard deviation threshold for normal gait outliers
NORMAL_GAIT_SD_THRESHOLD = 3.0  # > 3 SD = outlier in normal gait
GAGA_SD_THRESHOLD = 5.0         # > 5 SD = outlier in Gaga (more permissive)

print("Biomechanical Benchmarks Loaded:")
print(f"  Normal Gait: {len(NORMAL_GAIT_BENCHMARKS)} joint types")
print(f"  Gaga Tolerance: ROM x{GAGA_ROM_MULTIPLIER}, Velocity x{GAGA_VELOCITY_MULTIPLIER}")
print(f"  Physically Impossible Limits: {len(PHYSICALLY_IMPOSSIBLE)} joint types")
print()

# ============================================================
# BUILD GAGA-AWARE BIOMECHANICS TABLE
# ============================================================

gaga_biomechanics_data = []

for run_id, steps in complete_runs.items():
    print(f"\n{run_id}:")
    
    # Load step_06 data (angular velocities, ROM)
    if 'step_06' not in steps:
        print("  ‚ö†Ô∏è No kinematic data (step_06) - skipping biomechanical check")
        gaga_biomechanics_data.append({
            'Run_ID': run_id,
            'Total_Joints_Checked': 0,
            'Normal_Gait_Outliers': 0,
            'Gaga_Outliers': 0,
            'Physically_Impossible': 0,
            'Overall_Status': '‚ùå NO_DATA',
            'Notes': 'Kinematic data not available'
        })
        continue
    
    step_06 = steps['step_06']
    
    # Extract angular velocity and ROM statistics
    # These should be in the kinematics_summary.json
    joint_stats = safe_get(step_06, 'joint_statistics', default={})
    
    if not joint_stats:
        print("  ‚ö†Ô∏è No joint statistics in step_06 - kinematic analysis may be incomplete")
        gaga_biomechanics_data.append({
            'Run_ID': run_id,
            'Total_Joints_Checked': 0,
            'Normal_Gait_Outliers': 0,
            'Gaga_Outliers': 0,
            'Physically_Impossible': 0,
            'Overall_Status': '‚ö†Ô∏è INCOMPLETE',
            'Notes': 'Joint statistics not computed in pipeline'
        })
        continue
    
    # Analyze each joint
    normal_gait_outliers = []
    gaga_outliers = []
    physically_impossible = []
    total_joints = 0
    
    for joint_name, stats in joint_stats.items():
        total_joints += 1
        
        # Extract metrics
        max_angular_vel = safe_float(stats.get('max_angular_velocity', 0), default=0)
        rom = safe_float(stats.get('rom', 0), default=0)
        
        # Determine joint type (Shoulder, Elbow, Hip, Knee, etc.)
        joint_type = None
        for jtype in NORMAL_GAIT_BENCHMARKS.keys():
            if jtype in joint_name:
                joint_type = jtype
                break
        
        if joint_type is None:
            # Unknown joint type - skip
            continue
        
        # Get benchmarks
        normal_rom_mean, normal_rom_std, normal_vel_mean = NORMAL_GAIT_BENCHMARKS[joint_type]
        impossible_rom = PHYSICALLY_IMPOSSIBLE[joint_type]['rom']
        impossible_vel = PHYSICALLY_IMPOSSIBLE[joint_type]['velocity']
        
        # Gaga-adjusted thresholds
        gaga_rom_limit = normal_rom_mean + (GAGA_ROM_MULTIPLIER * normal_rom_std * GAGA_SD_THRESHOLD)
        gaga_vel_limit = normal_vel_mean * GAGA_VELOCITY_MULTIPLIER
        
        # Normal gait thresholds
        normal_rom_limit = normal_rom_mean + (NORMAL_GAIT_SD_THRESHOLD * normal_rom_std)
        normal_vel_limit = normal_vel_mean
        
        # ============================================================
        # CLASSIFICATION LOGIC (Gaga-Aware)
        # ============================================================
        
        # Check 1: Physically Impossible (CRITICAL - likely marker swap)
        if rom > impossible_rom or max_angular_vel > impossible_vel:
            physically_impossible.append({
                'joint': joint_name,
                'rom': rom,
                'max_vel': max_angular_vel,
                'rom_limit': impossible_rom,
                'vel_limit': impossible_vel,
                'reason': f"ROM={rom:.1f}¬∞ or Vel={max_angular_vel:.1f}¬∞/s exceeds physical limits"
            })
        
        # Check 2: Gaga outlier (intense dance, but physically possible)
        elif rom > gaga_rom_limit or max_angular_vel > gaga_vel_limit:
            gaga_outliers.append({
                'joint': joint_name,
                'rom': rom,
                'max_vel': max_angular_vel,
                'rom_limit': gaga_rom_limit,
                'vel_limit': gaga_vel_limit,
                'reason': f"Extreme movement (ROM={rom:.1f}¬∞, Vel={max_angular_vel:.1f}¬∞/s) - intense dance"
            })
        
        # Check 3: Normal gait outlier (outside normal but within Gaga range)
        elif rom > normal_rom_limit or max_angular_vel > normal_vel_limit:
            normal_gait_outliers.append({
                'joint': joint_name,
                'rom': rom,
                'max_vel': max_angular_vel,
                'rom_limit': normal_rom_limit,
                'vel_limit': normal_vel_limit,
                'reason': f"Exceeds normal gait (ROM={rom:.1f}¬∞, Vel={max_angular_vel:.1f}¬∞/s) - typical for dance"
            })
    
    # ============================================================
    # DETERMINE OVERALL STATUS
    # ============================================================
    
    if len(physically_impossible) > 0:
        overall_status = 'üî¥ CRITICAL'
        notes = f"{len(physically_impossible)} joint(s) show physically impossible movements - likely marker swap"
    elif len(gaga_outliers) > 0:
        overall_status = '‚ö†Ô∏è REVIEW'
        notes = f"{len(gaga_outliers)} joint(s) show extreme movements - intense dance, verify visually"
    elif len(normal_gait_outliers) > 0:
        overall_status = '‚úÖ PASS (HIGH_INTENSITY)'
        notes = f"{len(normal_gait_outliers)} joint(s) exceed normal gait - typical for Gaga expressive dance"
    else:
        overall_status = '‚úÖ PASS'
        notes = "All joints within normal biomechanical ranges"
    
    # Display summary
    print(f"  Total Joints Analyzed: {total_joints}")
    print(f"  Normal Gait Outliers: {len(normal_gait_outliers)} (typical for dance)")
    print(f"  Gaga Outliers: {len(gaga_outliers)} (extreme but possible)")
    print(f"  Physically Impossible: {len(physically_impossible)} (CRITICAL)")
    print(f"  Overall Status: {overall_status}")
    
    # Show critical issues
    if len(physically_impossible) > 0:
        print(f"\n  üî¥ CRITICAL - Physically Impossible Movements:")
        for issue in physically_impossible[:3]:  # Show first 3
            print(f"    {issue['joint']}: {issue['reason']}")
        if len(physically_impossible) > 3:
            print(f"    ... and {len(physically_impossible) - 3} more")
    
    # Show extreme movements (Gaga outliers)
    if len(gaga_outliers) > 0:
        print(f"\n  ‚ö†Ô∏è REVIEW - Extreme Movements (Gaga Outliers):")
        for issue in gaga_outliers[:3]:  # Show first 3
            print(f"    {issue['joint']}: {issue['reason']}")
        if len(gaga_outliers) > 3:
            print(f"    ... and {len(gaga_outliers) - 3} more")
    
    # Store for summary table
    gaga_biomechanics_data.append({
        'Run_ID': run_id,
        'Total_Joints_Checked': total_joints,
        'Normal_Gait_Outliers': len(normal_gait_outliers),
        'Gaga_Outliers': len(gaga_outliers),
        'Physically_Impossible': len(physically_impossible),
        'Overall_Status': overall_status,
        'Notes': notes,
        'Critical_Joints': ', '.join([x['joint'] for x in physically_impossible[:5]]) if physically_impossible else 'None',
        'Extreme_Joints': ', '.join([x['joint'] for x in gaga_outliers[:5]]) if gaga_outliers else 'None'
    })

# Create Gaga Biomechanics summary table
df_gaga = pd.DataFrame(gaga_biomechanics_data)

print("\n" + "="*80)
print("GAGA-AWARE BIOMECHANICS SUMMARY")
print("="*80)
display(df_gaga)

print("\n" + "="*80)
print("INTERPRETATION:")
print("="*80)
print("‚úÖ PASS: All joints within normal biomechanical ranges")
print("‚úÖ PASS (HIGH_INTENSITY): Exceeds normal gait but typical for Gaga expressive dance")
print("‚ö†Ô∏è REVIEW: Extreme movements detected - visually verify in Section 5 (LCS viz)")
print("   ‚Üí Tag as REVIEW, not REJECT - may be valid high-intensity dance")
print("üî¥ CRITICAL: Physically impossible movements - likely marker swap or tracking failure")
print("   ‚Üí Requires immediate attention - data may be corrupted")
print()

# Summary statistics
total_runs = len(df_gaga)
pass_count = df_gaga['Overall_Status'].str.contains('PASS').sum()
review_count = (df_gaga['Overall_Status'] == '‚ö†Ô∏è REVIEW').sum()
critical_count = (df_gaga['Overall_Status'] == 'üî¥ CRITICAL').sum()

print(f"Overall Summary:")
print(f"  Total Runs: {total_runs}")
print(f"  ‚úÖ Pass (including high-intensity): {pass_count}/{total_runs}")
print(f"  ‚ö†Ô∏è Review (extreme but possible): {review_count}/{total_runs}")
print(f"  üî¥ Critical (physically impossible): {critical_count}/{total_runs}")
print()

if critical_count > 0:
    print("‚ö†Ô∏è CRITICAL RUNS DETECTED:")
    critical_runs = df_gaga[df_gaga['Overall_Status'] == 'üî¥ CRITICAL']
    for idx, row in critical_runs.iterrows():
        print(f"  {row['Run_ID']}")
        print(f"    Critical Joints: {row['Critical_Joints']}")
        print(f"    ‚Üí ACTION: Check Section 5 visualization for marker swaps")
        print(f"    ‚Üí DECISION: Tag as REVIEW or REJECT based on visual inspection")
        print()

print("="*80)
print("SCIENTIFIC RATIONALE:")
print("="*80)
print("Per Longo et al. (2022): Expressive dance movements exceed normal gait ranges.")
print("  ‚Üí We use GAGA-AWARE THRESHOLDS (1.5x ROM, 2x velocity) to avoid false rejections")
print()
print("Per Wu et al. (2002): Joint-specific anatomical limits are physical constraints.")
print("  ‚Üí Movements exceeding these limits indicate marker swap or system error")
print()
print("DECISION LOGIC:")
print("  ‚Ä¢ Normal gait outlier ‚Üí PASS (expected in dance)")
print("  ‚Ä¢ Gaga outlier ‚Üí REVIEW (extreme dance, verify visually)")
print("  ‚Ä¢ Physically impossible ‚Üí CRITICAL (likely system error)")
print("="*80)

print("\n" + "="*80)
print("SECTION 6 COMPLETE")
print("="*80)
print("‚úÖ Gaga-Aware Biomechanics: Intelligent outlier detection")
print("‚úÖ Expressive Dance Protection: High-intensity movements tagged as REVIEW, not REJECT")
print("‚úÖ System Error Detection: Physically impossible movements flagged as CRITICAL")
print("="*80)

---

## Section 7: Signal-to-Noise Ratio (SNR) Quantification
**Purpose:** Measure signal health per joint (Cereatti et al., 2024) - Detect occlusion patterns

In [None]:
# ============================================================
# SECTION 7: Signal-to-Noise Ratio (SNR) Quantification
# ============================================================

print("="*80)
print("SECTION 7: SIGNAL-TO-NOISE RATIO (SNR) QUANTIFICATION")
print("="*80)
print("Purpose: Measure signal health per joint (Cereatti et al., 2024)")
print("Formula: SNR = 10 * log10(Power_Filtered_Signal / Power_Residuals)")
print("="*80)
print()

# ============================================================
# SNR THRESHOLDS (Cereatti et al., 2024)
# ============================================================

SNR_THRESHOLDS = {
    'excellent': 30,  # dB - Research grade
    'good': 20,       # dB - Clinical acceptable
    'acceptable': 15, # dB - Minimum for analysis
    'poor': 10,       # dB - Questionable quality
    'reject': 0       # dB - Below 10 dB = reject
}

print("SNR Quality Thresholds (Cereatti et al., 2024):")
print(f"  ‚≠ê Excellent: ‚â• {SNR_THRESHOLDS['excellent']} dB (Research grade)")
print(f"  ‚úÖ Good: ‚â• {SNR_THRESHOLDS['good']} dB (Clinical acceptable)")
print(f"  ‚ö†Ô∏è Acceptable: ‚â• {SNR_THRESHOLDS['acceptable']} dB (Minimum for analysis)")
print(f"  üî¥ Poor: ‚â• {SNR_THRESHOLDS['poor']} dB (Questionable quality)")
print(f"  ‚ùå Reject: < {SNR_THRESHOLDS['poor']} dB (Unacceptable)")
print()

# ============================================================
# BUILD SNR ANALYSIS TABLE
# ============================================================

snr_analysis_data = []

for run_id, steps in complete_runs.items():
    print(f"\n{run_id}:")
    
    # Load step_04 filtering data (contains residuals)
    if 'step_04' not in steps:
        print("  ‚ö†Ô∏è No filtering data (step_04) - skipping SNR analysis")
        snr_analysis_data.append({
            'Run_ID': run_id,
            'Mean_SNR_dB': 0.0,
            'Min_SNR_dB': 0.0,
            'Joints_Below_15dB': 0,
            'Overall_Status': '‚ùå NO_DATA',
            'Notes': 'Filtering data not available'
        })
        continue
    
    step_04 = steps['step_04']
    
    # Check for SNR analysis in filtering summary
    snr_data = safe_get(step_04, 'snr_analysis', default={})
    
    if not snr_data or 'per_joint' not in snr_data:
        print("  ‚ö†Ô∏è No SNR analysis in step_04 - filtering may be from older pipeline")
        snr_analysis_data.append({
            'Run_ID': run_id,
            'Mean_SNR_dB': 0.0,
            'Min_SNR_dB': 0.0,
            'Joints_Below_15dB': 0,
            'Overall_Status': '‚ö†Ô∏è INCOMPLETE',
            'Notes': 'SNR not computed in filtering step - enhance notebook 04'
        })
        continue
    
    # Extract per-joint SNR
    per_joint_snr = snr_data.get('per_joint', {})
    summary_snr = snr_data.get('summary', {})
    
    if not per_joint_snr:
        print("  ‚ö†Ô∏è Empty SNR data")
        snr_analysis_data.append({
            'Run_ID': run_id,
            'Mean_SNR_dB': 0.0,
            'Min_SNR_dB': 0.0,
            'Joints_Below_15dB': 0,
            'Overall_Status': '‚ö†Ô∏è INCOMPLETE',
            'Notes': 'SNR data empty'
        })
        continue
    
    # Compute statistics
    snr_values = [v['snr_db'] for v in per_joint_snr.values() if 'snr_db' in v]
    
    if len(snr_values) == 0:
        print("  ‚ö†Ô∏è No valid SNR values")
        snr_analysis_data.append({
            'Run_ID': run_id,
            'Mean_SNR_dB': 0.0,
            'Min_SNR_dB': 0.0,
            'Joints_Below_15dB': 0,
            'Overall_Status': '‚ö†Ô∏è INCOMPLETE',
            'Notes': 'No valid SNR values'
        })
        continue
    
    mean_snr = float(np.mean(snr_values))
    min_snr = float(np.min(snr_values))
    max_snr = float(np.max(snr_values))
    
    # Count joints below key thresholds
    below_15db = sum(1 for v in snr_values if v < 15.0)
    below_10db = sum(1 for v in snr_values if v < 10.0)
    
    # ============================================================
    # OCCLUSION PATTERN DETECTION (Cereatti et al., 2024)
    # ============================================================
    
    # Check for spine vs. limb SNR patterns
    # Low spine SNR + high limb SNR = torso marker occlusion
    spine_joints = [j for j in per_joint_snr.keys() if 'Spine' in j or 'Neck' in j or 'Head' in j]
    limb_joints = [j for j in per_joint_snr.keys() if any(x in j for x in ['Arm', 'Leg', 'Hand', 'Foot'])]
    
    spine_snr = [per_joint_snr[j]['snr_db'] for j in spine_joints if j in per_joint_snr and 'snr_db' in per_joint_snr[j]]
    limb_snr = [per_joint_snr[j]['snr_db'] for j in limb_joints if j in per_joint_snr and 'snr_db' in per_joint_snr[j]]
    
    mean_spine_snr = float(np.mean(spine_snr)) if spine_snr else 0.0
    mean_limb_snr = float(np.mean(limb_snr)) if limb_snr else 0.0
    
    # Detect occlusion pattern: spine SNR < 15 dB AND limb SNR > 20 dB
    occlusion_detected = (mean_spine_snr < 15.0 and mean_limb_snr > 20.0)
    
    # ============================================================
    # OVERALL STATUS CLASSIFICATION
    # ============================================================
    
    if below_10db > 0:
        overall_status = '‚ùå REJECT'
        notes = f"{below_10db} joint(s) below 10 dB - unacceptable signal quality"
    elif below_15db > 0:
        overall_status = '‚ö†Ô∏è REVIEW'
        notes = f"{below_15db} joint(s) below 15 dB - marginal signal quality"
    elif mean_snr < 20.0:
        overall_status = '‚úÖ ACCEPTABLE'
        notes = f"Mean SNR {mean_snr:.1f} dB - acceptable for analysis"
    elif mean_snr < 30.0:
        overall_status = '‚úÖ GOOD'
        notes = f"Mean SNR {mean_snr:.1f} dB - good signal quality"
    else:
        overall_status = '‚≠ê EXCELLENT'
        notes = f"Mean SNR {mean_snr:.1f} dB - research grade signal quality"
    
    # Add occlusion pattern note
    if occlusion_detected:
        notes += f" | üö® OCCLUSION: Spine SNR ({mean_spine_snr:.1f} dB) << Limb SNR ({mean_limb_snr:.1f} dB) - torso markers occluded"
    
    # Display summary
    print(f"  Total Joints Analyzed: {len(snr_values)}")
    print(f"  Mean SNR: {mean_snr:.1f} dB")
    print(f"  Min SNR: {min_snr:.1f} dB")
    print(f"  Max SNR: {max_snr:.1f} dB")
    print(f"  Joints < 15 dB: {below_15db}")
    print(f"  Joints < 10 dB: {below_10db}")
    
    if occlusion_detected:
        print(f"\n  üö® OCCLUSION PATTERN DETECTED:")
        print(f"    Spine Mean SNR: {mean_spine_snr:.1f} dB (low)")
        print(f"    Limb Mean SNR: {mean_limb_snr:.1f} dB (high)")
        print(f"    ‚Üí Indicates torso marker occlusion during performance")
    
    print(f"  Overall Status: {overall_status}")
    
    # Show worst joints (lowest SNR)
    worst_joints = sorted(per_joint_snr.items(), key=lambda x: x[1].get('snr_db', 0))[:5]
    
    if worst_joints:
        print(f"\n  Worst Signal Quality (Top 5):")
        for joint, data in worst_joints:
            snr_val = data.get('snr_db', 0)
            quality = data.get('quality', 'unknown')
            print(f"    {joint}: {snr_val:.1f} dB ({quality})")
    
    # Store for summary table
    snr_analysis_data.append({
        'Run_ID': run_id,
        'Mean_SNR_dB': round(mean_snr, 1),
        'Min_SNR_dB': round(min_snr, 1),
        'Max_SNR_dB': round(max_snr, 1),
        'Joints_Below_15dB': below_15db,
        'Joints_Below_10dB': below_10db,
        'Spine_Mean_SNR': round(mean_spine_snr, 1),
        'Limb_Mean_SNR': round(mean_limb_snr, 1),
        'Occlusion_Detected': 'üö® YES' if occlusion_detected else 'No',
        'Overall_Status': overall_status,
        'Notes': notes
    })

# Create SNR summary table
df_snr = pd.DataFrame(snr_analysis_data)

print("\n" + "="*80)
print("SNR ANALYSIS SUMMARY")
print("="*80)
display(df_snr)

print("\n" + "="*80)
print("INTERPRETATION:")
print("="*80)
print("‚≠ê EXCELLENT (‚â•30 dB): Research-grade signal quality")
print("‚úÖ GOOD (‚â•20 dB): Clinical-acceptable signal quality")
print("‚úÖ ACCEPTABLE (‚â•15 dB): Minimum threshold for biomechanical analysis")
print("‚ö†Ô∏è REVIEW (<15 dB): Marginal signal quality - verify results carefully")
print("‚ùå REJECT (<10 dB): Unacceptable signal quality - data unreliable")
print()
print("üö® OCCLUSION PATTERN: Low spine SNR + high limb SNR = torso marker occlusion")
print("   ‚Üí Check if dancer's torso was blocked by camera angles or other objects")
print("   ‚Üí May affect COM (center of mass) calculations and trunk kinematics")
print()

# Summary statistics
total_runs = len(df_snr)
excellent_count = (df_snr['Overall_Status'] == '‚≠ê EXCELLENT').sum()
good_count = (df_snr['Overall_Status'] == '‚úÖ GOOD').sum()
acceptable_count = (df_snr['Overall_Status'] == '‚úÖ ACCEPTABLE').sum()
review_count = (df_snr['Overall_Status'] == '‚ö†Ô∏è REVIEW').sum()
reject_count = (df_snr['Overall_Status'] == '‚ùå REJECT').sum()
occlusion_count = (df_snr['Occlusion_Detected'] == 'üö® YES').sum()

print(f"Overall Summary:")
print(f"  Total Runs: {total_runs}")
print(f"  ‚≠ê Excellent: {excellent_count}/{total_runs}")
print(f"  ‚úÖ Good: {good_count}/{total_runs}")
print(f"  ‚úÖ Acceptable: {acceptable_count}/{total_runs}")
print(f"  ‚ö†Ô∏è Review: {review_count}/{total_runs}")
print(f"  ‚ùå Reject: {reject_count}/{total_runs}")
print(f"  üö® Occlusion Detected: {occlusion_count}/{total_runs}")
print()

if occlusion_count > 0:
    print("‚ö†Ô∏è RUNS WITH OCCLUSION PATTERNS:")
    occluded_runs = df_snr[df_snr['Occlusion_Detected'] == 'üö® YES']
    for idx, row in occluded_runs.iterrows():
        print(f"  {row['Run_ID']}")
        print(f"    Spine SNR: {row['Spine_Mean_SNR']} dB | Limb SNR: {row['Limb_Mean_SNR']} dB")
        print(f"    ‚Üí ACTION: Review camera setup for torso visibility")
        print()

print("="*80)
print("SCIENTIFIC RATIONALE:")
print("="*80)
print("Per Cereatti et al. (2024): SNR quantifies signal health objectively.")
print("  Formula: SNR(dB) = 10 * log10(Power_Signal / Power_Residuals)")
print("  ‚Üí Higher SNR = cleaner signal, more reliable derivatives")
print()
print("Power Calculation:")
print("  Power = RMS¬≤ = (1/N) * Œ£(x¬≤)")
print("  ‚Üí Signal power: RMS of filtered signal")
print("  ‚Üí Noise power: RMS of residuals (signal - filtered)")
print()
print("Occlusion Detection:")
print("  Low spine SNR + High limb SNR = Differential tracking quality")
print("  ‚Üí Suggests torso markers were occluded while limb markers were visible")
print("  ‚Üí Common in dance: body blocking torso from certain camera angles")
print("="*80)

print("\n" + "="*80)
print("SECTION 7 COMPLETE")
print("="*80)
print("‚úÖ SNR Analysis: Objective signal health quantification")
print("‚úÖ Occlusion Detection: Identifies torso vs. limb tracking patterns")
print("‚úÖ Quality Thresholds: Research-grade classification (Cereatti 2024)")
print("="*80)

---

## Section 8: The Decision Matrix
**Purpose:** Final verdict combining all QC metrics - **ACCEPT / REVIEW / REJECT**

In [None]:
# ============================================================
# SECTION 8: THE DECISION MATRIX
# ============================================================

print("="*80)
print("SECTION 8: THE DECISION MATRIX")
print("="*80)
print("Purpose: Final verdict combining all QC metrics")
print("States: ACCEPT (‚úÖ), REVIEW (‚ö†Ô∏è), REJECT (‚ùå)")
print("="*80)
print()

# ============================================================
# QUALITY SCORE WEIGHTS (Cereatti et al., 2024)
# ============================================================

WEIGHTS = {
    'calibration': 0.15,      # Pointer/wand accuracy (R√°cz 2025)
    'bone_stability': 0.20,   # Rigid-body integrity (CV%)
    'temporal_quality': 0.10, # Sample time jitter
    'interpolation': 0.15,    # Gap filling quality
    'filtering': 0.10,        # Winter's residual analysis
    'snr': 0.20,              # Signal-to-noise ratio (Cereatti 2024)
    'biomechanics': 0.10      # Gaga-aware outlier detection
}

print("Quality Score Weights:")
for key, weight in WEIGHTS.items():
    print(f"  {key.replace('_', ' ').title()}: {weight*100:.0f}%")
print()

# Verify weights sum to 1.0
total_weight = sum(WEIGHTS.values())
assert abs(total_weight - 1.0) < 0.01, f"Weights must sum to 1.0, got {total_weight}"

# ============================================================
# SECTION SCORE NORMALIZATION FUNCTIONS
# ============================================================

def score_calibration(section1_status):
    """Score calibration quality (0-100)"""
    if section1_status == '‚úÖ PASS':
        return 100.0
    elif section1_status == '‚ö†Ô∏è REVIEW':
        return 70.0
    elif section1_status == '‚ùå FAIL':
        return 30.0
    else:
        return 50.0  # Unknown/missing

def score_bone_stability(bone_cv_percent):
    """Score bone stability based on CV% (0-100)"""
    if bone_cv_percent is None or bone_cv_percent == 'N/A':
        return 50.0
    try:
        cv = float(bone_cv_percent)
        if cv <= 0.5:
            return 100.0
        elif cv <= 1.0:
            return 90.0
        elif cv <= 1.5:
            return 70.0  # Threshold for acceptance
        elif cv <= 2.0:
            return 50.0
        else:
            return 20.0  # Poor stability
    except:
        return 50.0

def score_temporal_quality(time_jitter_sec):
    """Score temporal quality based on jitter (0-100)"""
    if time_jitter_sec is None or time_jitter_sec == 'N/A':
        return 50.0
    try:
        jitter_ms = float(time_jitter_sec) * 1000  # Convert to ms
        if jitter_ms <= 0.1:
            return 100.0
        elif jitter_ms <= 0.5:
            return 90.0
        elif jitter_ms <= 1.0:
            return 70.0
        elif jitter_ms <= 2.0:
            return 50.0
        else:
            return 20.0
    except:
        return 50.0

def score_interpolation(raw_missing_percent, method_category):
    """Score interpolation quality (0-100)"""
    if raw_missing_percent is None or raw_missing_percent == 'N/A':
        return 50.0
    try:
        missing = float(raw_missing_percent)
        
        # Base score from gap percentage
        if missing == 0:
            base_score = 100.0
        elif missing <= 1.0:
            base_score = 95.0
        elif missing <= 5.0:
            base_score = 80.0
        elif missing <= 10.0:
            base_score = 60.0
        else:
            base_score = 30.0
        
        # Penalty for linear fallback
        if 'üü† Linear Fallback' in str(method_category):
            base_score *= 0.85  # 15% penalty
        
        return base_score
    except:
        return 50.0

def score_filtering(winter_status):
    """Score filtering quality (0-100)"""
    if winter_status == '‚úÖ PASS':
        return 100.0
    elif winter_status == '‚ö†Ô∏è ARBITRARY':
        return 70.0
    elif winter_status == '‚ùå FAIL':
        return 30.0
    else:
        return 50.0

def score_snr(mean_snr_db):
    """Score SNR quality (0-100)"""
    if mean_snr_db is None or mean_snr_db == 'N/A' or mean_snr_db == 0.0:
        return 50.0
    try:
        snr = float(mean_snr_db)
        if snr >= 30.0:
            return 100.0  # Excellent
        elif snr >= 20.0:
            return 85.0   # Good
        elif snr >= 15.0:
            return 70.0   # Acceptable
        elif snr >= 10.0:
            return 40.0   # Poor
        else:
            return 10.0   # Reject
    except:
        return 50.0

def score_biomechanics(biomech_status):
    """Score biomechanics quality (0-100)"""
    if biomech_status == '‚úÖ PASS':
        return 100.0
    elif biomech_status == '‚úÖ PASS (HIGH_INTENSITY)':
        return 95.0  # Gaga-aware: high intensity is acceptable
    elif biomech_status == '‚ö†Ô∏è REVIEW':
        return 60.0
    elif biomech_status == 'üî¥ CRITICAL':
        return 10.0
    else:
        return 50.0

# ============================================================
# BUILD DECISION MATRIX
# ============================================================

decision_matrix_data = []

for run_id, steps in complete_runs.items():
    print(f"\n{'='*80}")
    print(f"DECISION MATRIX: {run_id}")
    print(f"{'='*80}")
    
    # ============================================================
    # EXTRACT METRICS FROM ALL SECTIONS
    # ============================================================
    
    # Section 0: Data Lineage
    section0 = next((row for row in section0_data if row['Run_ID'] == run_id), {})
    integrity_status = section0.get('Integrity_Status', 'N/A')
    
    # Section 1: Calibration
    section1 = next((row for row in calibration_data if row['Run_ID'] == run_id), {})
    calibration_status = section1.get('Calibration_Status', 'N/A')
    
    # Section 2: Rigid-Body
    section2 = next((row for row in rigidbody_data if row['Run_ID'] == run_id), {})
    bone_cv = section2.get('Bone_CV_%', 'N/A')
    time_jitter = section2.get('Time_Jitter_sec', 'N/A')
    
    # Section 3: Interpolation
    section3 = next((row for row in interpolation_data if row['Run_ID'] == run_id), {})
    raw_missing = section3.get('Raw_Missing_%', 'N/A')
    method_category = section3.get('Method_Category', 'N/A')
    
    # Section 4: Filtering
    section4 = next((row for row in filtering_data if row['Run_ID'] == run_id), {})
    winter_status = section4.get('Winter_Status', 'N/A')
    
    # Section 5: ISB Compliance (not in scoring, but checked for violations)
    section5 = next((row for row in isb_compliance_data if row['Run_ID'] == run_id), {})
    isb_violations = section5.get('ROM_Violations', 0)
    
    # Section 6: Gaga Biomechanics
    section6 = next((row for row in gaga_biomechanics_data if row['Run_ID'] == run_id), {})
    biomech_status = section6.get('Overall_Status', 'N/A')
    
    # Section 7: SNR
    section7 = next((row for row in snr_analysis_data if row['Run_ID'] == run_id), {})
    mean_snr = section7.get('Mean_SNR_dB', 'N/A')
    joints_below_10db = section7.get('Joints_Below_10dB', 0)
    occlusion_detected = section7.get('Occlusion_Detected', 'No')
    
    # ============================================================
    # COMPUTE QUALITY SCORE
    # ============================================================
    
    scores = {
        'calibration': score_calibration(calibration_status),
        'bone_stability': score_bone_stability(bone_cv),
        'temporal_quality': score_temporal_quality(time_jitter),
        'interpolation': score_interpolation(raw_missing, method_category),
        'filtering': score_filtering(winter_status),
        'snr': score_snr(mean_snr),
        'biomechanics': score_biomechanics(biomech_status)
    }
    
    # Weighted quality score (0-100)
    quality_score = sum(scores[key] * WEIGHTS[key] for key in WEIGHTS.keys())
    
    print("\nComponent Scores (0-100):")
    for key, score in scores.items():
        weight_pct = WEIGHTS[key] * 100
        contribution = score * WEIGHTS[key]
        print(f"  {key.replace('_', ' ').title()}: {score:.1f} √ó {weight_pct:.0f}% = {contribution:.1f}")
    
    print(f"\nüìä WEIGHTED QUALITY SCORE: {quality_score:.1f} / 100")
    
    # ============================================================
    # DECISION LOGIC (REJECT ‚Üí REVIEW ‚Üí ACCEPT)
    # ============================================================
    
    decision = None
    decision_reason = []
    category = None
    
    # ============================================================
    # CRITICAL FAILURES (REJECT)
    # ============================================================
    
    # Data integrity failure
    if integrity_status == '‚ùå MISMATCH':
        decision = '‚ùå REJECT'
        category = 'Data Integrity'
        decision_reason.append("Data hash mismatch - file modified after processing")
    
    # Calibration failure
    if calibration_status == '‚ùå FAIL' and decision is None:
        decision = '‚ùå REJECT'
        category = 'Calibration'
        pointer_error = section1.get('Pointer_Error_mm', 'N/A')
        decision_reason.append(f"Pointer calibration error ({pointer_error} mm) exceeds threshold")
    
    # Bone stability failure
    if bone_cv != 'N/A' and float(bone_cv) > 2.0 and decision is None:
        decision = '‚ùå REJECT'
        category = 'Rigid-Body Integrity'
        worst_bone = section2.get('Worst_Bone', 'Unknown')
        decision_reason.append(f"Bone_Stability_CV ({bone_cv}%) > threshold (2.0%) on {worst_bone} - marker tracking failure")
    
    # SNR failure (joints below 10 dB)
    if joints_below_10db > 0 and decision is None:
        decision = '‚ùå REJECT'
        category = 'Signal Quality'
        decision_reason.append(f"SNR failure: {joints_below_10db} joint(s) below 10 dB - unacceptable signal quality")
    
    # Biomechanics critical (physically impossible)
    if biomech_status == 'üî¥ CRITICAL' and decision is None:
        decision = '‚ùå REJECT'
        category = 'Biomechanical Validity'
        decision_reason.append("Physically impossible joint angles detected - likely marker swap")
    
    # ============================================================
    # REVIEW FLAGS (REVIEW)
    # ============================================================
    
    if decision is None:
        review_flags = []
        
        # Calibration review
        if calibration_status == '‚ö†Ô∏è REVIEW':
            review_flags.append("Calibration marginal")
        
        # Bone stability review
        if bone_cv != 'N/A' and float(bone_cv) > 1.5:
            review_flags.append(f"Bone CV ({bone_cv}%) above ideal (1.5%)")
        
        # SNR review
        if mean_snr != 'N/A' and float(mean_snr) < 15.0:
            review_flags.append(f"Mean SNR ({mean_snr} dB) below minimum (15 dB)")
        
        # Occlusion pattern
        if occlusion_detected == 'üö® YES':
            review_flags.append("Torso marker occlusion detected - trunk kinematics unreliable")
        
        # ISB violations (anatomical ROM exceeded)
        if isb_violations > 0:
            review_flags.append(f"{isb_violations} joint(s) exceeded anatomical ROM limits")
        
        # Gaga high intensity
        if biomech_status == '‚ö†Ô∏è REVIEW':
            review_flags.append("Extreme angular velocities - verify if dance or tracking error")
        
        # Linear fallback
        if 'üü† Linear Fallback' in str(method_category):
            review_flags.append("Linear interpolation fallback used - velocity accuracy reduced")
        
        # Winter arbitrary
        if winter_status == '‚ö†Ô∏è ARBITRARY':
            review_flags.append("Filter cutoff frequency arbitrary - knee point not found")
        
        # If any review flags, mark as REVIEW
        if len(review_flags) > 0:
            decision = '‚ö†Ô∏è REVIEW'
            category = 'Quality Flags'
            decision_reason.extend(review_flags)
    
    # ============================================================
    # ACCEPTANCE (ACCEPT)
    # ============================================================
    
    if decision is None:
        # Quality score threshold for acceptance
        if quality_score >= 80.0:
            decision = '‚úÖ ACCEPT (EXCELLENT)'
            category = 'Quality Score'
            decision_reason.append(f"Quality score {quality_score:.1f}/100 - excellent data quality")
        elif quality_score >= 70.0:
            decision = '‚úÖ ACCEPT (GOOD)'
            category = 'Quality Score'
            decision_reason.append(f"Quality score {quality_score:.1f}/100 - good data quality")
        elif quality_score >= 60.0:
            decision = '‚úÖ ACCEPT'
            category = 'Quality Score'
            decision_reason.append(f"Quality score {quality_score:.1f}/100 - acceptable data quality")
        else:
            decision = '‚ö†Ô∏è REVIEW'
            category = 'Quality Score'
            decision_reason.append(f"Quality score {quality_score:.1f}/100 - below ideal threshold (60)")
    
    # ============================================================
    # FORMAT DECISION REASON
    # ============================================================
    
    decision_reason_text = f"{decision} ({category}): " + "; ".join(decision_reason)
    
    # ============================================================
    # DISPLAY RESULT
    # ============================================================
    
    print(f"\n{'='*80}")
    print(f"FINAL DECISION: {decision}")
    print(f"{'='*80}")
    print(f"Category: {category}")
    print(f"Reason: {decision_reason_text}")
    print(f"{'='*80}")
    
    # ============================================================
    # STORE FOR SUMMARY TABLE
    # ============================================================
    
    decision_matrix_data.append({
        'Run_ID': run_id,
        'Quality_Score': round(quality_score, 1),
        'Calibration_Score': round(scores['calibration'], 1),
        'Bone_Stability_Score': round(scores['bone_stability'], 1),
        'Temporal_Score': round(scores['temporal_quality'], 1),
        'Interpolation_Score': round(scores['interpolation'], 1),
        'Filtering_Score': round(scores['filtering'], 1),
        'SNR_Score': round(scores['snr'], 1),
        'Biomechanics_Score': round(scores['biomechanics'], 1),
        'Decision': decision,
        'Decision_Category': category,
        'Decision_Reason': decision_reason_text
    })

# ============================================================
# CREATE DECISION MATRIX SUMMARY TABLE
# ============================================================

df_decision = pd.DataFrame(decision_matrix_data)

print("\n" + "="*80)
print("DECISION MATRIX SUMMARY")
print("="*80)
display(df_decision[['Run_ID', 'Quality_Score', 'Decision', 'Decision_Category']])

print("\n" + "="*80)
print("DETAILED REASONS")
print("="*80)
for idx, row in df_decision.iterrows():
    print(f"\n{row['Run_ID']}:")
    print(f"  {row['Decision_Reason']}")

# ============================================================
# SUMMARY STATISTICS
# ============================================================

total_runs = len(df_decision)
accept_count = df_decision['Decision'].str.contains('ACCEPT').sum()
review_count = df_decision['Decision'].str.contains('REVIEW').sum()
reject_count = df_decision['Decision'].str.contains('REJECT').sum()

mean_quality_score = df_decision['Quality_Score'].mean()
min_quality_score = df_decision['Quality_Score'].min()
max_quality_score = df_decision['Quality_Score'].max()

print("\n" + "="*80)
print("OVERALL SUMMARY")
print("="*80)
print(f"Total Runs Analyzed: {total_runs}")
print(f"  ‚úÖ ACCEPT: {accept_count}/{total_runs} ({accept_count/total_runs*100:.1f}%)")
print(f"  ‚ö†Ô∏è REVIEW: {review_count}/{total_runs} ({review_count/total_runs*100:.1f}%)")
print(f"  ‚ùå REJECT: {reject_count}/{total_runs} ({reject_count/total_runs*100:.1f}%)")
print()
print(f"Quality Score Statistics:")
print(f"  Mean: {mean_quality_score:.1f} / 100")
print(f"  Range: {min_quality_score:.1f} - {max_quality_score:.1f}")
print("="*80)

# ============================================================
# EXPORT TO EXCEL MASTER LOG
# ============================================================

print("\n" + "="*80)
print("EXPORTING MASTER LOG TO EXCEL")
print("="*80)

# Create comprehensive master log
master_log_data = []

for idx, row in df_decision.iterrows():
    run_id = row['Run_ID']
    
    # Get data from all sections
    section0 = next((r for r in section0_data if r['Run_ID'] == run_id), {})
    section1 = next((r for r in calibration_data if r['Run_ID'] == run_id), {})
    section2 = next((r for r in rigidbody_data if r['Run_ID'] == run_id), {})
    section3 = next((r for r in interpolation_data if r['Run_ID'] == run_id), {})
    section4 = next((r for r in filtering_data if r['Run_ID'] == run_id), {})
    section5 = next((r for r in isb_compliance_data if r['Run_ID'] == run_id), {})
    section6 = next((r for r in gaga_biomechanics_data if r['Run_ID'] == run_id), {})
    section7 = next((r for r in snr_analysis_data if r['Run_ID'] == run_id), {})
    
    master_log_data.append({
        # Identification
        'Run_ID': run_id,
        'Processing_Timestamp': section0.get('Processing_Timestamp', 'N/A'),
        'Pipeline_Version': section0.get('Pipeline_Version', 'N/A'),
        
        # Decision
        'Decision': row['Decision'],
        'Decision_Category': row['Decision_Category'],
        'Decision_Reason': row['Decision_Reason'],
        'Quality_Score': row['Quality_Score'],
        
        # Component Scores
        'Calibration_Score': row['Calibration_Score'],
        'Bone_Stability_Score': row['Bone_Stability_Score'],
        'Temporal_Score': row['Temporal_Score'],
        'Interpolation_Score': row['Interpolation_Score'],
        'Filtering_Score': row['Filtering_Score'],
        'SNR_Score': row['SNR_Score'],
        'Biomechanics_Score': row['Biomechanics_Score'],
        
        # Section Statuses
        'Integrity_Status': section0.get('Integrity_Status', 'N/A'),
        'Calibration_Status': section1.get('Calibration_Status', 'N/A'),
        'Rigid_Body_Status': section2.get('Rigid_Body_Status', 'N/A'),
        'Transparency_Status': section3.get('Transparency_Status', 'N/A'),
        'Winter_Status': section4.get('Winter_Status', 'N/A'),
        'ISB_Status': section5.get('ISB_Status', 'N/A'),
        'Biomech_Status': section6.get('Overall_Status', 'N/A'),
        'SNR_Status': section7.get('Overall_Status', 'N/A'),
        
        # Key Metrics
        'Pointer_Error_mm': section1.get('Pointer_Error_mm', 'N/A'),
        'Wand_Error_mm': section1.get('Wand_Error_mm', 'N/A'),
        'Bone_CV_%': section2.get('Bone_CV_%', 'N/A'),
        'Time_Jitter_sec': section2.get('Time_Jitter_sec', 'N/A'),
        'Raw_Missing_%': section3.get('Raw_Missing_%', 'N/A'),
        'Interpolation_Method': section3.get('Interpolation_Method', 'N/A'),
        'Cutoff_Hz': section4.get('Cutoff_Hz', 'N/A'),
        'ROM_Violations': section5.get('ROM_Violations', 0),
        'Mean_SNR_dB': section7.get('Mean_SNR_dB', 'N/A'),
        'Joints_Below_15dB': section7.get('Joints_Below_15dB', 0),
        'Occlusion_Detected': section7.get('Occlusion_Detected', 'No')
    })

df_master_log = pd.DataFrame(master_log_data)

# Export to Excel
excel_path = os.path.join(PROJECT_ROOT, "reports", "MASTER_QUALITY_LOG.xlsx")
os.makedirs(os.path.dirname(excel_path), exist_ok=True)

with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
    # Sheet 1: Master Log (all data)
    df_master_log.to_excel(writer, sheet_name='Master_Log', index=False)
    
    # Sheet 2: Decision Summary (concise)
    df_decision[['Run_ID', 'Quality_Score', 'Decision', 'Decision_Reason']].to_excel(
        writer, sheet_name='Decision_Summary', index=False
    )
    
    # Sheet 3: Component Scores
    score_cols = ['Run_ID', 'Quality_Score', 'Calibration_Score', 'Bone_Stability_Score', 
                  'Temporal_Score', 'Interpolation_Score', 'Filtering_Score', 'SNR_Score', 
                  'Biomechanics_Score']
    df_decision[score_cols].to_excel(writer, sheet_name='Component_Scores', index=False)

print(f"‚úÖ Master log exported to: {excel_path}")
print(f"   Sheets: Master_Log, Decision_Summary, Component_Scores")
print()

print("="*80)
print("SECTION 8 COMPLETE")
print("="*80)
print("‚úÖ Quality Score: Weighted average of all QC metrics")
print("‚úÖ Decision Logic: REJECT ‚Üí REVIEW ‚Üí ACCEPT with specific reasons")
print("‚úÖ Categorized Reasons: Clear, actionable explanations")
print("‚úÖ Excel Export: Complete master log with all metrics")
print("="*80)

---

## Section 9: Portable Report Links
**Purpose:** Fast inspection with relative-path links to all QC visualizations

In [None]:
# ============================================================
# SECTION 9: PORTABLE REPORT LINKS
# ============================================================

print("="*80)
print("SECTION 9: PORTABLE REPORT LINKS")
print("="*80)
print("Purpose: Fast inspection with relative-path links to QC visualizations")
print("Constraint: RELATIVE PATHS ONLY (project folder can be moved)")
print("="*80)
print()

# ============================================================
# DEFINE QC PLOT STRUCTURE
# ============================================================

# Expected QC plots and their locations (relative to PROJECT_ROOT)
QC_PLOT_TYPES = {
    'bone_stability': {
        'pattern': 'derivatives/step_02_preprocess/{run_id}__bone_stability.png',
        'section': 'Section 2',
        'description': 'Bone length stability over time'
    },
    'winter_residual': {
        'pattern': 'derivatives/step_04_filtering/{run_id}__winter_residual.png',
        'section': 'Section 4',
        'description': 'Winter residual analysis (RMS vs. cutoff frequency)'
    },
    'lcs_static': {
        'pattern': 'reports/{run_id}_lcs_static.html',
        'section': 'Section 5',
        'description': 'Static LCS visualization (3D skeleton)'
    },
    'lcs_interactive': {
        'pattern': 'reports/{run_id}_interactive_synced.html',
        'section': 'Section 5',
        'description': 'Interactive synchronized visualization'
    },
    'euler_angles': {
        'pattern': 'derivatives/step_06_rotvec/{run_id}__euler_angles.png',
        'section': 'Section 5',
        'description': 'Euler angles over time (all joints)'
    },
    'angular_velocity': {
        'pattern': 'derivatives/step_06_rotvec/{run_id}__angular_velocity.png',
        'section': 'Section 6',
        'description': 'Angular velocity over time'
    },
    'snr_per_joint': {
        'pattern': 'derivatives/step_04_filtering/{run_id}__snr_per_joint.png',
        'section': 'Section 7',
        'description': 'SNR per joint bar chart'
    }
}

# ============================================================
# HELPER FUNCTION: CONVERT TO RELATIVE PATH
# ============================================================

def to_relative_path(abs_path, base_path):
    """
    Convert absolute path to relative path from base_path.
    
    Args:
        abs_path: Absolute file path
        base_path: Base directory (PROJECT_ROOT)
    
    Returns:
        Relative path string (e.g., "./derivatives/step_02/...")
    """
    try:
        # Convert to Path objects
        abs_path_obj = Path(abs_path).resolve()
        base_path_obj = Path(base_path).resolve()
        
        # Compute relative path
        rel_path = abs_path_obj.relative_to(base_path_obj)
        
        # Return with "./" prefix for clarity
        return f"./{rel_path.as_posix()}"
    except ValueError:
        # If paths are on different drives (Windows), return absolute as fallback
        return abs_path

def check_file_exists(file_path):
    """Check if file exists and return status emoji."""
    if os.path.exists(file_path):
        return '‚úÖ'
    else:
        return '‚ùå'

# ============================================================
# BUILD PORTABLE LINKS TABLE
# ============================================================

portable_links_data = []

for run_id, steps in complete_runs.items():
    print(f"\n{'='*80}")
    print(f"PORTABLE LINKS: {run_id}")
    print(f"{'='*80}")
    
    # Get decision for this run
    decision_row = next((row for row in decision_matrix_data if row['Run_ID'] == run_id), {})
    decision = decision_row.get('Decision', 'N/A')
    quality_score = decision_row.get('Quality_Score', 'N/A')
    
    print(f"Decision: {decision}")
    print(f"Quality Score: {quality_score}")
    print()
    
    # Collect all QC plot links for this run
    links = {}
    
    for plot_type, config in QC_PLOT_TYPES.items():
        # Construct absolute path
        abs_path = os.path.join(PROJECT_ROOT, config['pattern'].format(run_id=run_id))
        
        # Convert to relative path
        rel_path = to_relative_path(abs_path, PROJECT_ROOT)
        
        # Check if file exists
        exists = check_file_exists(abs_path)
        
        # Store link info
        links[plot_type] = {
            'rel_path': rel_path,
            'exists': exists,
            'section': config['section'],
            'description': config['description']
        }
        
        # Display status
        print(f"  {exists} {config['section']}: {plot_type}")
        print(f"     Path: {rel_path}")
        if exists == '‚ùå':
            print(f"     Note: File not found - may not be generated yet")
        print()
    
    # ============================================================
    # CREATE MARKDOWN-FORMATTED LINKS
    # ============================================================
    
    # For Jupyter display: create clickable HTML links
    bone_stability_link = f'<a href="{links["bone_stability"]["rel_path"]}" target="_blank">Bone Stability</a>' if links['bone_stability']['exists'] == '‚úÖ' else 'N/A'
    winter_residual_link = f'<a href="{links["winter_residual"]["rel_path"]}" target="_blank">Winter Residual</a>' if links['winter_residual']['exists'] == '‚úÖ' else 'N/A'
    lcs_static_link = f'<a href="{links["lcs_static"]["rel_path"]}" target="_blank">LCS Static</a>' if links['lcs_static']['exists'] == '‚úÖ' else 'N/A'
    lcs_interactive_link = f'<a href="{links["lcs_interactive"]["rel_path"]}" target="_blank">LCS Interactive</a>' if links['lcs_interactive']['exists'] == '‚úÖ' else 'N/A'
    euler_angles_link = f'<a href="{links["euler_angles"]["rel_path"]}" target="_blank">Euler Angles</a>' if links['euler_angles']['exists'] == '‚úÖ' else 'N/A'
    angular_velocity_link = f'<a href="{links["angular_velocity"]["rel_path"]}" target="_blank">Angular Velocity</a>' if links['angular_velocity']['exists'] == '‚úÖ' else 'N/A'
    snr_per_joint_link = f'<a href="{links["snr_per_joint"]["rel_path"]}" target="_blank">SNR Per Joint</a>' if links['snr_per_joint']['exists'] == '‚úÖ' else 'N/A'
    
    # Store for table
    portable_links_data.append({
        'Run_ID': run_id,
        'Decision': decision,
        'Quality_Score': quality_score,
        'Bone_Stability': bone_stability_link,
        'Winter_Residual': winter_residual_link,
        'LCS_Static': lcs_static_link,
        'LCS_Interactive': lcs_interactive_link,
        'Euler_Angles': euler_angles_link,
        'Angular_Velocity': angular_velocity_link,
        'SNR_Per_Joint': snr_per_joint_link
    })

# ============================================================
# CREATE PORTABLE LINKS SUMMARY TABLE
# ============================================================

df_portable_links = pd.DataFrame(portable_links_data)

print("\n" + "="*80)
print("PORTABLE REPORT LINKS SUMMARY")
print("="*80)
print("All paths are RELATIVE - project folder can be moved without breaking links")
print()

# Display table with HTML links (clickable in Jupyter)
from IPython.display import HTML, display as ipython_display

# Create HTML table
html_table = df_portable_links.to_html(escape=False, index=False, classes='table table-striped')

print("Interactive Table (click links to open visualizations):")
ipython_display(HTML(html_table))

# ============================================================
# EXPORT PORTABLE LINKS TO MARKDOWN
# ============================================================

print("\n" + "="*80)
print("EXPORTING PORTABLE LINKS TO MARKDOWN")
print("="*80)

markdown_path = os.path.join(PROJECT_ROOT, "reports", "PORTABLE_LINKS.md")
os.makedirs(os.path.dirname(markdown_path), exist_ok=True)

with open(markdown_path, 'w', encoding='utf-8') as f:
    f.write("# Portable Report Links\n\n")
    f.write("**Generated:** " + pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n")
    f.write("**Note:** All paths are relative - this project folder can be moved without breaking links.\n\n")
    f.write("---\n\n")
    
    for idx, row in df_portable_links.iterrows():
        run_id = row['Run_ID']
        decision = row['Decision']
        quality_score = row['Quality_Score']
        
        f.write(f"## {run_id}\n\n")
        f.write(f"**Decision:** {decision}  \n")
        f.write(f"**Quality Score:** {quality_score}  \n\n")
        
        f.write("### QC Visualizations\n\n")
        
        # Write links for each plot type
        for plot_type, config in QC_PLOT_TYPES.items():
            abs_path = os.path.join(PROJECT_ROOT, config['pattern'].format(run_id=run_id))
            rel_path = to_relative_path(abs_path, PROJECT_ROOT)
            exists = check_file_exists(abs_path)
            
            if exists == '‚úÖ':
                f.write(f"- **{config['section']} - {config['description']}:**  \n")
                f.write(f"  [{plot_type}]({rel_path})\n\n")
            else:
                f.write(f"- **{config['section']} - {config['description']}:**  \n")
                f.write(f"  ‚ùå Not available (file not generated)\n\n")
        
        f.write("---\n\n")

print(f"‚úÖ Portable links exported to: {markdown_path}")
print()

# ============================================================
# VERIFY RELATIVE PATH PORTABILITY
# ============================================================

print("="*80)
print("RELATIVE PATH VERIFICATION")
print("="*80)
print()

# Test that all relative paths are indeed relative
all_relative = True
for idx, row in df_portable_links.iterrows():
    run_id = row['Run_ID']
    
    for plot_type, config in QC_PLOT_TYPES.items():
        abs_path = os.path.join(PROJECT_ROOT, config['pattern'].format(run_id=run_id))
        rel_path = to_relative_path(abs_path, PROJECT_ROOT)
        
        # Check if path is relative (starts with ./)
        if not rel_path.startswith('./'):
            print(f"‚ö†Ô∏è WARNING: Path is not relative: {rel_path}")
            all_relative = False

if all_relative:
    print("‚úÖ All paths are relative - portability verified!")
    print()
    print("You can now:")
    print("  1. Move the entire project folder to a different location")
    print("  2. Share the folder via cloud storage (Dropbox, Google Drive, etc.)")
    print("  3. Open the notebook on a different computer")
    print("  ‚Üí All links will still work!")
else:
    print("‚ö†Ô∏è Some paths are absolute - portability may be limited")

print()

# ============================================================
# STATISTICS
# ============================================================

print("="*80)
print("VISUALIZATION AVAILABILITY STATISTICS")
print("="*80)

for plot_type, config in QC_PLOT_TYPES.items():
    available_count = 0
    total_count = len(complete_runs)
    
    for run_id in complete_runs.keys():
        abs_path = os.path.join(PROJECT_ROOT, config['pattern'].format(run_id=run_id))
        if os.path.exists(abs_path):
            available_count += 1
    
    availability_pct = (available_count / total_count * 100) if total_count > 0 else 0
    
    print(f"\n{plot_type}:")
    print(f"  {config['section']} - {config['description']}")
    print(f"  Available: {available_count}/{total_count} ({availability_pct:.1f}%)")
    
    if available_count < total_count:
        print(f"  Note: {total_count - available_count} file(s) missing - check upstream notebooks")

print()

print("="*80)
print("SECTION 9 COMPLETE")
print("="*80)
print("‚úÖ Portable Links: Relative paths for all QC visualizations")
print("‚úÖ Markdown Export: Shareable report with clickable links")
print("‚úÖ Portability Verified: Project folder can be moved without breaking links")
print("‚úÖ Availability Stats: Track which visualizations exist")
print("="*80)

print("\n" + "="*80)
print("MASTER AUDIT & RESULTS NOTEBOOK - ALL 9 SECTIONS COMPLETE!")
print("="*80)
print("‚úÖ Section 0: Data Lineage & Provenance")
print("‚úÖ Section 1: R√°cz Calibration Layer")
print("‚úÖ Section 2: Rigid-Body & Temporal Audit")
print("‚úÖ Section 3: Gap & Interpolation Transparency")
print("‚úÖ Section 4: Winter's Residual Validation")
print("‚úÖ Section 5: ISB Compliance & Synchronized Viz")
print("‚úÖ Section 6: Gaga-Aware Biomechanics")
print("‚úÖ Section 7: SNR Quantification")
print("‚úÖ Section 8: The Decision Matrix")
print("‚úÖ Section 9: Portable Report Links")
print()
print("üéâ MASTER AUDIT COMPLETE - READY FOR PRODUCTION! üéâ")
print("="*80)

---

# FINAL SECTION: Dataset Yield Table
**Purpose:** Executive summary - at-a-glance dataset quality overview

In [None]:
# ============================================================
# FINAL SECTION: DATASET YIELD TABLE
# ============================================================

print("="*80)
print("DATASET YIELD TABLE")
print("="*80)
print("Executive Summary: At-a-glance overview of dataset quality and data loss")
print("="*80)
print()

# ============================================================
# COMPUTE OVERALL YIELD METRICS
# ============================================================

total_takes = len(df_decision)

# Count by decision type
accept_count = df_decision['Decision'].str.contains('ACCEPT').sum()
review_count = df_decision['Decision'].str.contains('REVIEW').sum()
reject_count = df_decision['Decision'].str.contains('REJECT').sum()

# Calculate percentages
accept_pct = (accept_count / total_takes * 100) if total_takes > 0 else 0
review_pct = (review_count / total_takes * 100) if total_takes > 0 else 0
reject_pct = (reject_count / total_takes * 100) if total_takes > 0 else 0

# ============================================================
# DATASET YIELD SUMMARY TABLE
# ============================================================

yield_summary = pd.DataFrame({
    'Metric': ['Total Takes', 'Accepted', 'Need Review', 'Rejected'],
    'Count': [total_takes, accept_count, review_count, reject_count],
    'Percentage': [100.0, accept_pct, review_pct, reject_pct]
})

print("DATASET YIELD SUMMARY")
print("="*80)
display(yield_summary)
print()

# Visual bar representation
print("Visual Breakdown:")
print("="*80)
accept_bar = '‚ñà' * int(accept_pct / 2) if accept_pct > 0 else ''
review_bar = '‚ñà' * int(review_pct / 2) if review_pct > 0 else ''
reject_bar = '‚ñà' * int(reject_pct / 2) if reject_pct > 0 else ''

print(f"‚úÖ Accepted ({accept_count}/{total_takes}, {accept_pct:.1f}%): {accept_bar}")
print(f"‚ö†Ô∏è Review   ({review_count}/{total_takes}, {review_pct:.1f}%): {review_bar}")
print(f"‚ùå Rejected ({reject_count}/{total_takes}, {reject_pct:.1f}%): {reject_bar}")
print()

# ============================================================
# DATA LOSS ANALYSIS: GROUP BY DECISION CATEGORY
# ============================================================

print("="*80)
print("DATA LOSS ANALYSIS: WHY WERE TAKES REJECTED OR FLAGGED FOR REVIEW?")
print("="*80)
print()

# Extract REVIEW and REJECT cases
review_cases = df_decision[df_decision['Decision'].str.contains('REVIEW')].copy()
reject_cases = df_decision[df_decision['Decision'].str.contains('REJECT')].copy()

# ============================================================
# REJECTION REASONS (CRITICAL DATA LOSS)
# ============================================================

if len(reject_cases) > 0:
    print("REJECTION REASONS (Critical Data Loss):")
    print("-" * 80)
    
    # Group by Decision_Category
    reject_by_category = reject_cases.groupby('Decision_Category').size().reset_index(name='Count')
    reject_by_category['Percentage_of_Total'] = (reject_by_category['Count'] / total_takes * 100)
    reject_by_category['Percentage_of_Rejects'] = (reject_by_category['Count'] / len(reject_cases) * 100)
    reject_by_category = reject_by_category.sort_values('Count', ascending=False)
    
    for idx, row in reject_by_category.iterrows():
        category = row['Decision_Category']
        count = row['Count']
        pct_total = row['Percentage_of_Total']
        pct_rejects = row['Percentage_of_Rejects']
        
        print(f"\n‚ùå {category}:")
        print(f"   Count: {count}/{total_takes} takes ({pct_total:.1f}% of total dataset)")
        print(f"   Impact: {pct_rejects:.1f}% of all rejections")
        
        # Show example runs for this category
        examples = reject_cases[reject_cases['Decision_Category'] == category]['Run_ID'].head(3).tolist()
        if examples:
            print(f"   Example runs: {', '.join(examples[:2])}")
        
        # Extract common reason patterns
        reasons = reject_cases[reject_cases['Decision_Category'] == category]['Decision_Reason'].tolist()
        if reasons:
            # Get first reason as representative
            print(f"   Typical reason: {reasons[0][:100]}...")
    
    print()
    print("-" * 80)
    print(f"Total Data Loss from Rejections: {reject_count}/{total_takes} takes ({reject_pct:.1f}%)")
    print("-" * 80)
    print()
else:
    print("‚úÖ NO REJECTIONS - All takes passed critical quality checks!")
    print()

# ============================================================
# REVIEW REASONS (REQUIRES MANUAL INSPECTION)
# ============================================================

if len(review_cases) > 0:
    print("="*80)
    print("REVIEW REASONS (Requires Manual Inspection):")
    print("-" * 80)
    
    # Group by Decision_Category
    review_by_category = review_cases.groupby('Decision_Category').size().reset_index(name='Count')
    review_by_category['Percentage_of_Total'] = (review_by_category['Count'] / total_takes * 100)
    review_by_category['Percentage_of_Reviews'] = (review_by_category['Count'] / len(review_cases) * 100)
    review_by_category = review_by_category.sort_values('Count', ascending=False)
    
    for idx, row in review_by_category.iterrows():
        category = row['Decision_Category']
        count = row['Count']
        pct_total = row['Percentage_of_Total']
        pct_reviews = row['Percentage_of_Reviews']
        
        print(f"\n‚ö†Ô∏è {category}:")
        print(f"   Count: {count}/{total_takes} takes ({pct_total:.1f}% of total dataset)")
        print(f"   Impact: {pct_reviews:.1f}% of all reviews")
        
        # Show example runs
        examples = review_cases[review_cases['Decision_Category'] == category]['Run_ID'].head(3).tolist()
        if examples:
            print(f"   Example runs: {', '.join(examples[:2])}")
        
        # Extract common flags
        reasons = review_cases[review_cases['Decision_Category'] == category]['Decision_Reason'].tolist()
        if reasons:
            # Parse individual flags from semicolon-separated reasons
            all_flags = []
            for reason in reasons:
                # Extract the part after the category
                if ':' in reason:
                    flags_part = reason.split(':', 1)[1]
                    flags = [f.strip() for f in flags_part.split(';')]
                    all_flags.extend(flags)
            
            # Count unique flags
            from collections import Counter
            flag_counts = Counter(all_flags)
            
            print(f"   Common flags:")
            for flag, flag_count in flag_counts.most_common(3):
                print(f"     ‚Ä¢ {flag} ({flag_count} runs)")
    
    print()
    print("-" * 80)
    print(f"Total Flagged for Review: {review_count}/{total_takes} takes ({review_pct:.1f}%)")
    print("-" * 80)
    print()
else:
    print("‚úÖ NO REVIEW FLAGS - All takes have clean quality scores!")
    print()

# ============================================================
# ACTIONABLE INSIGHTS
# ============================================================

print("="*80)
print("ACTIONABLE INSIGHTS")
print("="*80)
print()

# Identify top 3 reasons for data loss
all_problematic = pd.concat([reject_cases, review_cases])

if len(all_problematic) > 0:
    top_issues = all_problematic.groupby('Decision_Category').size().reset_index(name='Count')
    top_issues['Percentage'] = (top_issues['Count'] / total_takes * 100)
    top_issues = top_issues.sort_values('Count', ascending=False).head(3)
    
    print("Top 3 Issues Affecting Dataset Quality:")
    print()
    
    for idx, (i, row) in enumerate(top_issues.iterrows(), 1):
        category = row['Decision_Category']
        count = row['Count']
        pct = row['Percentage']
        
        print(f"{idx}. {category}")
        print(f"   Affected: {count} takes ({pct:.1f}% of dataset)")
        
        # Provide specific recommendation
        if 'Rigid-Body' in category:
            print(f"   üí° Recommendation: Check marker attachment - consider double-sided tape or additional securing")
        elif 'Signal Quality' in category:
            print(f"   üí° Recommendation: Verify camera placement and lighting - markers may be occluded")
        elif 'Calibration' in category:
            print(f"   üí° Recommendation: Review calibration procedure - ensure pointer and wand are properly tracked")
        elif 'Quality Flags' in category:
            print(f"   üí° Recommendation: Review flagged takes visually using Section 5 interactive visualization")
        elif 'Quality Score' in category:
            print(f"   üí° Recommendation: Multiple minor issues - check overall experimental protocol")
        else:
            print(f"   üí° Recommendation: Review {category} metrics in detail using Master Audit sections")
        print()
else:
    print("‚úÖ EXCELLENT DATASET QUALITY - No significant issues detected!")
    print()

# ============================================================
# OVERALL DATASET STATUS
# ============================================================

print("="*80)
print("OVERALL DATASET STATUS")
print("="*80)
print()

# Determine overall dataset health
if reject_pct == 0 and review_pct == 0:
    status = "‚≠ê EXCELLENT"
    message = "Perfect dataset - all takes accepted with high quality scores"
elif reject_pct <= 5 and review_pct <= 10:
    status = "‚úÖ GOOD"
    message = "High-quality dataset with minimal data loss"
elif reject_pct <= 15 and review_pct <= 25:
    status = "‚ö†Ô∏è ACCEPTABLE"
    message = "Acceptable dataset but consider improving data collection protocols"
else:
    status = "üî¥ POOR"
    message = "Significant data loss - review experimental setup and protocols"

print(f"Dataset Health: {status}")
print(f"Assessment: {message}")
print()
print(f"Usable Data: {accept_count}/{total_takes} takes ({accept_pct:.1f}%)")
print(f"Requires Review: {review_count}/{total_takes} takes ({review_pct:.1f}%)")
print(f"Data Loss: {reject_count}/{total_takes} takes ({reject_pct:.1f}%)")
print()

# ============================================================
# EXPORT DATASET YIELD TO CSV
# ============================================================

print("="*80)
print("EXPORTING DATASET YIELD REPORT")
print("="*80)

csv_path = os.path.join(PROJECT_ROOT, "reports", "DATASET_YIELD_REPORT.csv")
os.makedirs(os.path.dirname(csv_path), exist_ok=True)

# Create comprehensive yield report
yield_report_data = []

# Overall summary
yield_report_data.append({
    'Category': 'Overall',
    'Metric': 'Total Takes',
    'Count': total_takes,
    'Percentage': 100.0,
    'Status': status,
    'Notes': message
})

yield_report_data.append({
    'Category': 'Overall',
    'Metric': 'Accepted',
    'Count': accept_count,
    'Percentage': accept_pct,
    'Status': '‚úÖ',
    'Notes': 'High-quality data ready for analysis'
})

yield_report_data.append({
    'Category': 'Overall',
    'Metric': 'Need Review',
    'Count': review_count,
    'Percentage': review_pct,
    'Status': '‚ö†Ô∏è',
    'Notes': 'Requires visual inspection before use'
})

yield_report_data.append({
    'Category': 'Overall',
    'Metric': 'Rejected',
    'Count': reject_count,
    'Percentage': reject_pct,
    'Status': '‚ùå',
    'Notes': 'Critical quality failures - data unreliable'
})

# Rejection breakdown
if len(reject_cases) > 0:
    for idx, row in reject_by_category.iterrows():
        yield_report_data.append({
            'Category': 'Rejection Reason',
            'Metric': row['Decision_Category'],
            'Count': row['Count'],
            'Percentage': row['Percentage_of_Total'],
            'Status': '‚ùå',
            'Notes': f"{row['Percentage_of_Rejects']:.1f}% of all rejections"
        })

# Review breakdown
if len(review_cases) > 0:
    for idx, row in review_by_category.iterrows():
        yield_report_data.append({
            'Category': 'Review Reason',
            'Metric': row['Decision_Category'],
            'Count': row['Count'],
            'Percentage': row['Percentage_of_Total'],
            'Status': '‚ö†Ô∏è',
            'Notes': f"{row['Percentage_of_Reviews']:.1f}% of all reviews"
        })

df_yield_report = pd.DataFrame(yield_report_data)
df_yield_report.to_csv(csv_path, index=False)

print(f"‚úÖ Dataset yield report exported to: {csv_path}")
print()

print("="*80)
print("DATASET YIELD TABLE COMPLETE")
print("="*80)
print()

print("="*80)
print("üéâ MASTER AUDIT & RESULTS NOTEBOOK - COMPLETE!")
print("="*80)
print()
print("Summary:")
print(f"  ‚Ä¢ Total Sections: 10 (0-9 + Final Yield Table)")
print(f"  ‚Ä¢ Total Takes Analyzed: {total_takes}")
print(f"  ‚Ä¢ Accepted: {accept_count} ({accept_pct:.1f}%)")
print(f"  ‚Ä¢ Review: {review_count} ({review_pct:.1f}%)")
print(f"  ‚Ä¢ Rejected: {reject_count} ({reject_pct:.1f}%)")
print(f"  ‚Ä¢ Dataset Status: {status}")
print()
print("Exports:")
print(f"  ‚Ä¢ Excel Master Log: reports/MASTER_QUALITY_LOG.xlsx")
print(f"  ‚Ä¢ Portable Links: reports/PORTABLE_LINKS.md")
print(f"  ‚Ä¢ Dataset Yield: reports/DATASET_YIELD_REPORT.csv")
print()
print("Next Steps:")
print("  1. Review REVIEW-flagged takes using Section 9 links")
print("  2. Investigate top rejection reasons using Actionable Insights")
print("  3. Share reports with supervisor/collaborators")
print()
print("üéâ THANK YOU FOR USING THE MASTER AUDIT & RESULTS NOTEBOOK! üéâ")
print("="*80)