In [10]:
import pandas as pd
from pathlib import Path

def add_segment_column(file_path, gap_hours=2):
    """Add segment column to CSV based on time gaps."""
    df = pd.read_csv(file_path)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Find gaps larger than threshold
    time_diffs = df['Timestamp'].diff()
    gap_mask = time_diffs > pd.Timedelta(hours=gap_hours)
    df['segment'] = gap_mask.cumsum()
    
    return df


def process_all_files(input_dir='data', output_dir='data/processed', gap_hours=2):
    """Process all coordinate files: add segments and save with worm IDs."""
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    stats = {'total': 0, 'success': 0, 'failed': []}
    
    for folder in ['TERBINAFINE- (control)', 'TERBINAFINE+']:
        folder_path = Path(input_dir) / folder
        if not folder_path.exists():
            continue
        
        for csv_file in folder_path.glob('coordinates_*.csv'):
            stats['total'] += 1
            try:
                # Extract worm ID: coordinates_highestspeed_DATE_HOUR_NUM_...
                parts = csv_file.stem.split('_')
                wormid = f"{parts[2]}_{parts[3]}_{parts[4]}"
                
                df = add_segment_column(str(csv_file), gap_hours)
                df.to_csv(output_path / f'{wormid}.csv', index=False)
                
                stats['success'] += 1
                if stats['success'] % 10 == 0:
                    print(f"Processed {stats['success']}/{stats['total']}...")
            except Exception as e:
                stats['failed'].append(f"{csv_file.name}: {e}")
    
    print(f"\n{'='*50}\nComplete: {stats['success']}/{stats['total']} files")
    if stats['failed']:
        print(f"Failed ({len(stats['failed'])}):")
        for f in stats['failed']:
            print(f"  {f}")
    
    return stats


# Run processing
stats = process_all_files(gap_hours=2)

Processed 10/10...
Processed 20/20...
Processed 20/20...
Processed 30/30...
Processed 30/30...
Processed 40/40...
Processed 40/40...
Processed 50/50...
Processed 50/50...
Processed 60/60...
Processed 60/60...
Processed 70/70...
Processed 70/70...
Processed 80/80...
Processed 80/80...
Processed 90/90...
Processed 90/90...
Processed 100/100...
Processed 100/100...

Complete: 104/104 files

Complete: 104/104 files
