# 04: Aggregate per Day

Aggregate IMU data by athlete and day for daily exposure analysis.


In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.segmentation.aggregate_per_day import aggregate_daily_data

# Load athlete-segmented data
data_dir = Path('../data')
athlete_dir = data_dir / '03_segment_by_athlete'
daily_dir = data_dir / '04_aggregate_per_day' / 'daily_athlete_data'

# Find athlete data files
athlete_files = list(athlete_dir.glob('*.csv'))
print(f"Found {len(athlete_files)} athlete files")

if athlete_files:
    # Load first file as example
    df = pd.read_csv(athlete_files[0])
    print(f"Data shape: {df.shape}")
    
    # Aggregate daily data
    daily_agg = aggregate_daily_data(df)
    print(f"Daily aggregated shape: {daily_agg.shape}")
    print(f"Daily data columns: {list(daily_agg.columns)}")
    print("\nSample daily data:")
    print(daily_agg.head())
else:
    print("No athlete data found. Using sample data for demonstration...")
    
    # Load sample data directly for demo
    sample_files = list((data_dir / '00_collect' / 'imu').glob('sample_imu_*.csv'))
    if sample_files:
        df = pd.read_csv(sample_files[0])
        print(f"Using sample data shape: {df.shape}")
        
        # Ensure timestamp is datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Aggregate daily data
        daily_agg = aggregate_daily_data(df)
        print(f"Daily aggregated shape: {daily_agg.shape}")
        print(f"Daily data columns: {list(daily_agg.columns)}")
        print("\nSample daily data:")
        print(daily_agg.head())
        
        # Save aggregated data
        daily_dir.mkdir(parents=True, exist_ok=True)
        output_file = daily_dir / f"daily_agg_{sample_files[0].stem}.csv"
        daily_agg.to_csv(output_file, index=False)
        print(f"Saved daily aggregated data to: {output_file}")
    else:
        print("No sample data found either.")


Found 0 athlete files
No athlete data found. Using sample data for demonstration...
Using sample data shape: (1007, 14)
Daily aggregated shape: (1, 9)
Daily data columns: ['athlete_id', 'date', 'r_gs_mean', 'r_gs_max', 'r_gs_std', 'sample_count', 'x_mean', 'y_mean', 'z_mean']

Sample daily data:
  athlete_id        date  r_gs_mean  r_gs_max  r_gs_std  sample_count  \
0       A002  2025-01-16   1.007132  1.719199  0.065401          1007   

     x_mean    y_mean    z_mean  
0 -0.018535  0.148259 -0.989543  
Saved daily aggregated data to: ../data/04_aggregate_per_day/daily_athlete_data/daily_agg_sample_imu_A002_R001.csv
