In [None]:
import pandas as pd
from pathlib import Path
import os

In [None]:
root_directory = r"data\cleaned\chipshot"
# Header rows you originally used

# Desired output column order
target_order = [
    'Time', 
    'Rectus Femoris right', 'Rectus Femoris left', 
    'Hamstrings right', 'Hamstrings left', 
    'TibilaisÂ Anterior right', 'TibilaisÂ Anterior left', 
    'Gastrocnemius right', 'Gastrocnemius left'
]


In [None]:
def process_emg_file(file_path):
    try:
        # Read the file with the specific header structure
        df = pd.read_csv(file_path, header=[3, 4])
        
        # Clean the MultiIndex columns
        df.columns = pd.MultiIndex.from_tuples(
            [(c[0].split('(')[0].strip(), c[1].strip()) for c in df.columns]
        )
        
        # Extract Time column (assuming it's the first column)
        time_col = df.iloc[:, 0]
        
        # Extract EMG data (only columns containing 'mV' in level 1)
        # We use a try/except here in case a specific file doesn't have this section
        try:
            emg_data = df.xs('EMG 1 (mV)', level=1, axis=1).copy()
        except KeyError:
            print(f"Skipping {file_path.name}: 'EMG 1 (mV)' not found.")
            return None

        # Insert Time and Reindex
        emg_data.insert(0, 'Time', time_col)
        
        # Reorder columns to match target_order
        # Note: If a file is missing one of the target columns, this will insert NaN
        emg_data = emg_data.reindex(columns=target_order)
        
        return emg_data

    except Exception as e:
        print(f"Error processing {file_path.name}: {e}")
        return None

In [None]:
processed_files = {} # Dictionary to store your results

# Path(root_directory).rglob('*.csv') searches recursively for all csv files
print(f"Searching in: {root_directory}...\n")

for file_path in Path(root_directory).rglob('*.csv'):
    print(f"Processing: {file_path}")
    
    # Apply the logic
    processed_df = process_emg_file(file_path)
    
    # Store valid results
    if processed_df is not None:
        # We use the filename (or relative path) as the key
        key_name = f"{file_path.parent.name}/{file_path.name}"
        processed_files[key_name] = processed_df

print(f"\nProcessing complete. {len(processed_files)} files successfully processed.")

In [None]:
if processed_files:
    first_key = list(processed_files.keys())[0]
    print(f"\nPreview of: {first_key}")
    display(processed_files[first_key].head())

In [None]:
output_directory = r"data\processed_emg_data"

# Create the directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

print(f"Saving files to: {output_directory}...\n")

# 2. SAVE LOGIC
saved_count = 0

for key_name, df in processed_files.items():
    try:
        # key_name currently looks like "Jordan/5.csv" or "Jordan\5.csv"
        # We replace directory separators with underscores to make a flat filename
        # Example: "Jordan/5.csv" becomes "Jordan_5.csv"
        safe_filename = key_name.replace('\\', '_').replace('/', '_')
        
        # Construct the full output path
        save_path = os.path.join(output_directory, safe_filename)
        
        # Save to CSV (index=False prevents writing the row numbers)
        df.to_csv(save_path, index=False)
        
        print(f"Saved: {safe_filename}")
        saved_count += 1
        
    except Exception as e:
        print(f"Failed to save {key_name}: {e}")

print(f"\nSuccess! {saved_count} files have been saved to '{output_directory}'.")