## imports

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
from datetime import timedelta
import scipy.stats as stats
import shutil
from collections import defaultdict

# Specify the path to the desired directory
parent_dir = root_dir = r'<<< PLACE HERE DIRECTORY WITH DATASET >>>'

# Change the current working directory to the specified directory
os.chdir(parent_dir)

# Verify that the working directory has been changed
print("Current working directory:", os.getcwd())

Current working directory: C:\Users\BootMR\Documents\data_export\MAT


## Explanation


In [None]:
'''
First convert ITLOG files with Inertia software to MAT files
Run Matlab script to compute cadence rate into csv format
Run Python scripts to correct files with cadence rates, merge them, and sort into right folders 

'''

In [34]:
## correct columns

def process_and_update_cadence_files(directory):
    """
    Processes cadence files in the specified directory:
    - Targets files ending with '-cadence-2hz.csv'
    - Renames 'Time_s' column to 'timestamp'
    - Removes 'Time_mn' column
    - Renames 'Cadence_tpmn' column to 'cadence_tpmn'

    Parameters:
        directory (str): Path to the directory containing the files.
    """
    # Get all relevant cadence files
    cadence_files = [f for f in os.listdir(directory) if f.endswith("-cadence-2hz.csv")]
    
    for cadence_file in cadence_files:
        cadence_file_path = os.path.join(directory, cadence_file)
        
        try:
            # Load the data
            cadence_df = pd.read_csv(cadence_file_path)

            # Rename 'Time_s' → 'timestamp'
            if 'Time_s' in cadence_df.columns:
                cadence_df.rename(columns={'Time_s': 'timestamp'}, inplace=True)

            # Drop 'Time_mn' if it exists
            if 'Time_mn' in cadence_df.columns:
                cadence_df.drop(columns=['Time_mn'], inplace=True)

            # Rename 'Cadence_tpmn' → 'cadence_tpmn'
            if 'Cadence_tpmn' in cadence_df.columns:
                cadence_df.rename(columns={'Cadence_tpmn': 'cadence_tpmn'}, inplace=True)

            # Save changes
            cadence_df.to_csv(cadence_file_path, index=False)
            print(f"✅ Processed: {cadence_file_path}")

        except Exception as e:
            print(f"❌ Error processing {cadence_file_path}: {e}")

# Example usage:
process_and_update_cadence_files(r"C:\Users\BootMR\Documents\data_export\MAT")


✅ Processed: C:\Users\BootMR\Documents\data_export\MAT\04_20240524T141611_516-pedal_1-node_516-cadence-2hz.csv
✅ Processed: C:\Users\BootMR\Documents\data_export\MAT\04_20240524T141622_516-pedal_1-node_516-cadence-2hz.csv
✅ Processed: C:\Users\BootMR\Documents\data_export\MAT\04_20240524T141631_516-pedal_1-node_516-cadence-2hz.csv


In [35]:
## correct timestamps

def extract_start_time_from_imu(file_path):
    """
    Extract the start time from the IMU file by reading line 3 (index 2),
    assuming format: ...,YYYY-MM-DD HH:MM:SS,...
    """
    with open(file_path, 'r') as file:
        imu_lines = file.readlines()
        try:
            timestamp_line = imu_lines[2]
            imu_start_time_str = timestamp_line.split(",")[1].strip()
            return datetime.strptime(imu_start_time_str, "%Y-%m-%d %H:%M:%S")
        except (IndexError, ValueError) as e:
            raise ValueError(f"Error parsing timestamp from line: {timestamp_line}\nError: {e}")

def process_file_pairs(directory):
    """
    Finds IMU and matching cadence files in the directory,
    replaces relative timestamps in cadence file with absolute timestamps,
    and saves the result back to the same file.
    """
    all_files = os.listdir(directory)

    imu_files = [f for f in all_files if f.endswith("_imuoriginal.csv")]
    cadence_files = [f for f in all_files if f.endswith("-cadence-2hz.csv")]

    for imu_file in imu_files:
        # Extract prefix before '_imuoriginal.csv'
        prefix = imu_file.replace("_imuoriginal.csv", "")

        # Try to find a cadence file that starts with the same prefix
        matching_cadence = next((f for f in cadence_files if f.startswith(prefix)), None)

        if matching_cadence:
            imu_path = os.path.join(directory, imu_file)
            cadence_path = os.path.join(directory, matching_cadence)

            try:
                # Get absolute start time from IMU
                imu_start_time = extract_start_time_from_imu(imu_path)
                print(f"✅ IMU start time from {imu_file}: {imu_start_time}")

                # Load cadence file
                cadence_df = pd.read_csv(cadence_path)

                # Replace relative timestamps with absolute ones
                if 'timestamp' not in cadence_df.columns:
                    raise ValueError("Missing 'timestamp' column in cadence file.")

                cadence_df['timestamp'] = cadence_df['timestamp'].apply(
                    lambda x: imu_start_time + timedelta(seconds=float(x))
                )

                cadence_df['timestamp'] = cadence_df['timestamp'] + timedelta(seconds=2)

                # Save modified cadence file
                cadence_df.to_csv(cadence_path, index=False)
                print(f"💾 Updated cadence timestamps in: {matching_cadence}")

            except Exception as e:
                print(f"❌ Error processing:\n  IMU: {imu_path}\n  Cadence: {cadence_path}\n  Error: {e}")
        else:
            print(f"⚠️ No matching cadence file found for IMU file: {imu_file}")

# Example usage
directory_path = r"C:\Users\BootMR\Documents\data_export\MAT"
process_file_pairs(directory_path)


✅ IMU start time from 04_20240524T141611_516_imuoriginal.csv: 2024-05-06 14:12:06
💾 Updated cadence timestamps in: 04_20240524T141611_516-pedal_1-node_516-cadence-2hz.csv
✅ IMU start time from 04_20240524T141622_516_imuoriginal.csv: 2024-05-06 14:40:48
💾 Updated cadence timestamps in: 04_20240524T141622_516-pedal_1-node_516-cadence-2hz.csv
✅ IMU start time from 04_20240524T141631_516_imuoriginal.csv: 2024-05-06 15:01:10
💾 Updated cadence timestamps in: 04_20240524T141631_516-pedal_1-node_516-cadence-2hz.csv


In [38]:


def merge_cadence_files_by_pid(source_dir, export_root):
    """
    Merges `*-2hz.csv` files by p_id and saves them in export_root/p_id/{p_id}_cadence_merged.csv.
    
    Parameters:
        source_dir (str): Directory with input cadence files.
        export_root (str): Root directory where merged files will be saved in subfolders.
    """
    all_files = os.listdir(source_dir)
    target_files = [f for f in all_files if f.endswith("-2hz.csv") and len(f) >= 2 and f[:2].isdigit()]

    # Group files by p_id (first two characters of filename)
    grouped_files = defaultdict(list)
    for f in target_files:
        p_id = f[:2]
        grouped_files[p_id].append(f)

    for p_id, files in grouped_files.items():
        files_sorted = sorted(files)
        merged_df = pd.DataFrame()

        for f in files_sorted:
            try:
                df = pd.read_csv(os.path.join(source_dir, f))
                merged_df = pd.concat([merged_df, df], ignore_index=True)
            except Exception as e:
                print(f"❌ Error reading {f}: {e}")

        # Create subfolder and save merged file with name like 04_cadence_merged.csv
        output_folder = os.path.join(export_root, p_id)
        os.makedirs(output_folder, exist_ok=True)

        output_filename = f"{p_id}_cadence_merged.csv"
        output_path = os.path.join(output_folder, output_filename)

        merged_df.to_csv(output_path, index=False)
        print(f"✅ Merged {len(files_sorted)} files for p_id {p_id} → {output_path}")

# Example usage
source_dir = r"C:\Users\BootMR\Documents\data_export\MAT"
export_root = r"C:\Users\BootMR\Documents\data_export"
merge_cadence_files_by_pid(source_dir, export_root)


✅ Merged 3 files for p_id 04 → C:\Users\BootMR\Documents\data_export\04\04_cadence_merged.csv
