# 02: Detect & Trim Runs

Detect run segments and trim them from the continuous data stream.


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../../src')

from sledhead_imu.segmentation.detect_trim_runs import detect_run_segments, trim_run_segment
from sledhead_imu.config import INGEST, TRIM

# Load normalized data
data_dir = Path('../data')
ingest_dir = data_dir / '01_ingest_normalize'
trim_dir = data_dir / '02_detect_trim_runs' / 'segments'

# Find normalized data files
normalized_files = list(ingest_dir.glob('*.csv'))
print(f"Found {len(normalized_files)} normalized files")

if normalized_files:
    # Load first file as example
    df = pd.read_csv(normalized_files[0])
    print(f"Data shape: {df.shape}")
    
    # Ensure timestamp is datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Detect run segments
    segments = detect_run_segments(df, threshold=1.5)
    print(f"Found {len(segments)} run segments")
    
    # Trim first segment
    if segments:
        start_idx, end_idx = segments[0]
        trimmed_segment = trim_run_segment(df, start_idx, end_idx)
        print(f"Trimmed segment shape: {trimmed_segment.shape}")
        
        # Calculate duration safely
        if len(trimmed_segment) > 1:
            duration = trimmed_segment['timestamp'].iloc[-1] - trimmed_segment['timestamp'].iloc[0]
            print(f"Segment duration: {duration}")
        else:
            print("Segment too short to calculate duration")
        
        # Save trimmed segment for next stage
        trim_dir.mkdir(parents=True, exist_ok=True)
        output_file = trim_dir / f"trimmed_{normalized_files[0].stem}.csv"
        trimmed_segment.to_csv(output_file, index=False)
        print(f"Saved trimmed segment to: {output_file}")
else:
    print("No normalized data found. Run 01_ingest_normalize.ipynb first.")


Found 1 normalized files
Data shape: (1007, 15)
Found 6 run segments
Trimmed segment shape: (1, 15)
Segment too short to calculate duration
Saved trimmed segment to: ../data/02_detect_trim_runs/segments/trimmed_normalized_sample_imu_A002_R001.csv


In [3]:
# Demo: Work with sample data directly if no normalized data exists
if len(normalized_files) == 0:
    print("No normalized data found. Using sample data for demonstration...")
    
    # Load sample data directly
    sample_files = list((data_dir / '00_collect' / 'imu').glob('sample_imu_*.csv'))
    if sample_files:
        df = pd.read_csv(sample_files[0])
        print(f"Using sample data shape: {df.shape}")
        
        # Ensure timestamp is datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        
        # Detect run segments
        segments = detect_run_segments(df, threshold=1.5)
        print(f"Found {len(segments)} run segments")
        
        # Trim first segment
        if segments:
            start_idx, end_idx = segments[0]
            trimmed_segment = trim_run_segment(df, start_idx, end_idx)
            print(f"Trimmed segment shape: {trimmed_segment.shape}")
            
            # Calculate duration safely
            if len(trimmed_segment) > 1:
                duration = trimmed_segment['timestamp'].iloc[-1] - trimmed_segment['timestamp'].iloc[0]
                print(f"Segment duration: {duration}")
            else:
                print("Segment too short to calculate duration")
            
            # Save trimmed segment
            trim_dir.mkdir(parents=True, exist_ok=True)
            output_file = trim_dir / f"trimmed_sample_{sample_files[0].stem}.csv"
            trimmed_segment.to_csv(output_file, index=False)
            print(f"Saved trimmed segment to: {output_file}")
    else:
        print("No sample data found either.")
