# 01: Ingest & Normalize

Load raw IMU data and normalize to unified schema.


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.io.load_imu import load_imu_data
from sledhead_imu.ingest.normalize import normalize_imu_data
from sledhead_imu.config import COLLECT, INGEST

# Alternative: use relative paths if config doesn't work
import os
print(f"Current working directory: {os.getcwd()}")

data_dir = Path('../data')
collect_dir = data_dir / '00_collect'

# Also try absolute path as backup
abs_data_dir = Path('/Users/jeff/sledhead-imu/data')
abs_collect_dir = abs_data_dir / '00_collect'

# Load sample IMU data
imu_files = list((collect_dir / 'imu').glob('sample_imu_*.csv'))
print(f"Looking in: {collect_dir / 'imu'}")
print(f"Files found: {[f.name for f in imu_files]}")
print(f"Found {len(imu_files)} IMU files")

# If no files found with relative path, try absolute path
if len(imu_files) == 0:
    print("Trying absolute path...")
    imu_files = list((abs_collect_dir / 'imu').glob('sample_imu_*.csv'))
    print(f"Looking in: {abs_collect_dir / 'imu'}")
    print(f"Files found: {[f.name for f in imu_files]}")
    print(f"Found {len(imu_files)} IMU files")

# Load and normalize first file as example
if imu_files:
    df_raw = load_imu_data(imu_files[0])
    print(f"Raw data shape: {df_raw.shape}")
    print(f"Columns: {list(df_raw.columns)}")
    
    # Normalize the data
    df_normalized = normalize_imu_data(df_raw, {})
    print(f"Normalized data shape: {df_normalized.shape}")
    print(f"Sample data:")
    print(df_normalized.head())
    
    # Save normalized data for next stage
    output_dir = data_dir / '01_ingest_normalize'
    output_dir.mkdir(parents=True, exist_ok=True)
    output_file = output_dir / f"normalized_{imu_files[0].stem}.csv"
    df_normalized.to_csv(output_file, index=False)
    print(f"Saved normalized data to: {output_file}")
else:
    print("No IMU files found. Make sure sample data is available.")


Current working directory: /Users/jeff/sledhead-imu/notebooks
Looking in: ../data/00_collect/imu
Files found: ['sample_imu_A002_R001.csv', 'sample_imu_A003_R001.csv', 'sample_imu_A003_R002.csv', 'sample_imu_A002_R002.csv', 'sample_imu_A002_R003.csv', 'sample_imu_A003_R003.csv', 'sample_imu_A001_R002.csv', 'sample_imu_A001_R003.csv', 'sample_imu_A001_R001.csv', 'sample_imu_data.csv']
Found 10 IMU files
Raw data shape: (1007, 14)
Columns: ['timestamp', 'athlete_id', 'run_id', 'accy', 'accz', 'gyrox', 'gyroy', 'gyroz', 't', 'x', 'y', 'z', 'r_gs', 'num_symptoms']
Normalized data shape: (1007, 15)
Sample data:
                timestamp athlete_id run_id  accy  accz  gyrox  gyroy  gyroz  \
0 2025-01-16 09:00:00.000       A002   R001  5129  4771   4959   5002   4994   
1 2025-01-16 09:00:00.001       A002   R001  5133  4770   4958   5005   4995   
2 2025-01-16 09:00:00.002       A002   R001  5131  4771   4961   5008   4994   
3 2025-01-16 09:00:00.003       A002   R001  5130  4775   4960   50

In [None]:
# This cell is now empty - use the first cell above
