# 06: Compute ≥2 g Exposure Metrics

Calculate exposure dose metrics for head impact assessment.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.features.exposure_2g import compute_exposure

# Load filtered data from previous stage
data_dir = Path('../data')
filtered_dir = data_dir / '05_filtering' / 'filtered_data'
exposure_dir = data_dir / '06_features_exposure_2g' / 'exposure_data'

# Find filtered files
filtered_files = list(filtered_dir.glob('*.csv'))
print(f"Found {len(filtered_files)} filtered files")

if filtered_files:
    # Load first filtered file
    df = pd.read_csv(filtered_files[0])
    print(f"Filtered data shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    
    # Check if we have enough data points for meaningful exposure calculation
    if len(df) < 10:
        print(f"Warning: Filtered data has only {len(df)} rows, which is insufficient for exposure calculation.")
        print("This appears to be daily aggregated data. Using sample time series data for demonstration...")
        
        # Use sample data instead
        np.random.seed(42)
        n_samples = 1000
        timestamps = pd.date_range('2025-01-01', periods=n_samples, freq='10ms')
        
        data = []
        for i in range(n_samples):
            g_mag = 1.0 + np.random.normal(0, 0.1)
            if np.random.random() < 0.05:
                g_mag = np.random.uniform(2.0, 8.0)
            
            data.append({
                'timestamp': timestamps[i],
                'athlete_id': 'A001',
                'run_id': 'R001',
                'g_mag': g_mag
            })
        
        df = pd.DataFrame(data)
        g_col = 'g_mag'
        print(f"Using sample data with {len(df)} rows for exposure calculation")
        
    else:
        # Compute exposure metrics on filtered data
        print("\nComputing ≥2 g exposure metrics on filtered data...")
        
        # Check what columns are available and use the appropriate one
        available_cols = list(df.columns)
        print(f"Available columns: {available_cols}")
        
        # Try to find a magnitude column (prefer savgol filtered, then median, then original)
        g_col = None
        for suffix in ['_savgol', '_median', '_hp', '_lp', '']:
            for axis in ['x', 'y', 'z']:
                col_name = f"{axis}_mean{suffix}" if suffix else f"{axis}_mean"
                if col_name in df.columns:
                    # Calculate magnitude from this column
                    df[f"g_mag{suffix}"] = df[col_name].abs()
                    g_col = f"g_mag{suffix}"
                    break
            if g_col:
                break
        
        if not g_col:
            # Fallback: use r_gs if available
            if 'r_gs_mean' in df.columns:
                g_col = 'r_gs_mean'
                df['g_mag'] = df['r_gs_mean']
            else:
                print("Warning: No suitable magnitude column found, using sample data")
                # Use sample data instead
                np.random.seed(42)
                n_samples = 1000
                timestamps = pd.date_range('2025-01-01', periods=n_samples, freq='10ms')
                
                data = []
                for i in range(n_samples):
                    g_mag = 1.0 + np.random.normal(0, 0.1)
                    if np.random.random() < 0.05:
                        g_mag = np.random.uniform(2.0, 8.0)
                    
                    data.append({
                        'timestamp': timestamps[i],
                        'athlete_id': 'A001',
                        'run_id': 'R001',
                        'g_mag': g_mag
                    })
                
                df = pd.DataFrame(data)
                g_col = 'g_mag'
        
        print(f"Using column: {g_col}")
        
        # Ensure we have a timestamp column for exposure calculation
        if 'timestamp' not in df.columns:
            print("Adding timestamp column for exposure calculation...")
            # Create a timestamp column based on the data length
            n_samples = len(df)
            timestamps = pd.date_range('2025-01-01', periods=n_samples, freq='10ms')
            df['timestamp'] = timestamps
        
        # Ensure timestamp is datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Ensure we have required grouping columns
        if 'athlete_id' not in df.columns:
            print("Adding athlete_id column...")
            df['athlete_id'] = 'A001'  # Default athlete ID
        
        if 'run_id' not in df.columns:
            print("Adding run_id column...")
            df['run_id'] = 'R001'  # Default run ID
        
        print(f"Data shape before exposure calculation: {df.shape}")
        print(f"Required columns: {['athlete_id', 'run_id', 'timestamp', g_col]}")
    
    # Compute exposure metrics
    exposure_results = compute_exposure(df, g_col, threshold=2.0)
    print(f"Exposure results:")
    print(exposure_results)
    
    # Save exposure results
    exposure_dir.mkdir(parents=True, exist_ok=True)
    output_file = exposure_dir / f"exposure_{filtered_files[0].stem}.csv"
    exposure_results.to_csv(output_file, index=False)
    print(f"Saved exposure data to: {output_file}")
    
else:
    print("No filtered data found. Using sample data for demonstration...")
    
    # Load sample data directly for demo
    sample_files = list((data_dir / '00_collect' / 'imu').glob('sample_imu_*.csv'))
    if sample_files:
        df = pd.read_csv(sample_files[0])
        print(f"Using sample data shape: {df.shape}")
        
        # Compute exposure metrics
        exposure_results = compute_exposure(df, 'g_mag', threshold=2.0)
        print(f"Exposure results:")
        print(exposure_results)
        
        # Save sample exposure results
        exposure_dir.mkdir(parents=True, exist_ok=True)
        output_file = exposure_dir / f"exposure_sample_{sample_files[0].stem}.csv"
        exposure_results.to_csv(output_file, index=False)
        print(f"Saved sample exposure data to: {output_file}")
    else:
        print("No sample data found either.")


# 06: Compute ≥2 g Exposure Metrics

Calculate exposure dose metrics for head impact assessment.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.features.exposure_2g import compute_exposure
from sledhead_imu.config import FILTERED, EXPOSURE

# Load sample data (simulated for demo)
print("Computing ≥2 g exposure metrics...")

# Create sample data with some high-g events
np.random.seed(42)
n_samples = 1000
timestamps = pd.date_range('2025-01-01', periods=n_samples, freq='10ms')

data = []
for i in range(n_samples):
    # Base acceleration around 1g
    g_mag = 1.0 + np.random.normal(0, 0.1)
    
    # Add some high-g events (≥2g)
    if np.random.random() < 0.05:  # 5% chance of high-g event
        g_mag = np.random.uniform(2.0, 8.0)
    
    data.append({
        'timestamp': timestamps[i],
        'athlete_id': 'A001',
        'run_id': 'R001',
        'g_mag': g_mag
    })

df = pd.DataFrame(data)
print(f"Sample data shape: {df.shape}")
print(f"High-g events (≥2g): {(df['g_mag'] >= 2.0).sum()}")

# Compute exposure metrics
exposure_results = compute_exposure(df, 'g_mag', threshold=2.0)
print(f"Exposure results:")
print(exposure_results)


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.features.exposure_2g import compute_exposure
from sledhead_imu.config import FILTERED, EXPOSURE

# Load filtered data (simulated for demo)
print("Computing ≥2 g exposure metrics...")

# Create sample filtered data
np.random.seed(42)
n_samples = 1000
timestamps = pd.date_range('2025-01-01', periods=n_samples, freq='10ms')

# Generate sample data with some high-g events
data = []
for i in range(n_samples):
    # Base acceleration around 1g
    g_mag = 1.0 + np.random.normal(0, 0.1)
    
    # Add some high-g events (≥2g)
    if np.random.random() < 0.05:  # 5% chance of high-g event
        g_mag = np.random.uniform(2.0, 8.0)
    
    data.append({
        'timestamp': timestamps[i],
        'athlete_id': 'A001',
        'run_id': 'R001',
        'g_mag': g_mag
    })

df = pd.DataFrame(data)
print(f"Sample data shape: {df.shape}")
print(f"High-g events (≥2g): {(df['g_mag'] >= 2.0).sum()}")

# Compute exposure metrics
exposure_results = compute_exposure(df, 'g_mag', threshold=2.0)
print(f"Exposure results:")
print(exposure_results)
