In [5]:
import pandas as pd
from pathlib import Path
import os

In [6]:
root_directory = r"data\cleaned\chipshot"
target_order = [
    'Time', 
    'Rectus Femoris right', 'Rectus Femoris left', 
    'Hamstrings right', 'Hamstrings left', 
    'TibilaisÂ Anterior right', 'TibilaisÂ Anterior left', 
    'Gastrocnemius right', 'Gastrocnemius left'
]


In [7]:
df = pd.read_csv(r"data/cleaned/chipshot/jordan/1.csv", header=[3,4])
df

Unnamed: 0_level_0,Hamstrings right (85641),Hamstrings right (85641),Rectus Femoris right (85844),Rectus Femoris right (85844),Gastrocnemius left (85904),Gastrocnemius left (85904),TibilaisÂ Anterior right (85650),TibilaisÂ Anterior right (85650),Hamstrings left (85586),Hamstrings left (85586),TibilaisÂ Anterior left (85605),TibilaisÂ Anterior left (85605),Rectus Femoris left (85878),Rectus Femoris left (85878),Gastrocnemius right (85873),Gastrocnemius right (85873)
Unnamed: 0_level_1,EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV),EMG 1 Time Series (s),EMG 1 (mV)
0,0.000000,-0.205615,0.000000,-0.045991,0.000000,-0.020478,0.000000,0.059587,0.000000,-0.052537,0.000000,0.010575,0.000000,-0.061936,0.000000,-0.000504
1,0.000465,-0.199237,0.000465,-0.046494,0.000465,-0.009232,0.000465,0.042130,0.000465,-0.051530,0.000465,0.048844,0.000465,-0.096849,0.000465,-0.012589
2,0.000931,-0.170367,0.000931,-0.040787,0.000931,0.001846,0.000931,0.007553,0.000931,-0.051865,0.000931,0.096178,0.000931,-0.158282,0.000931,-0.030045
3,0.001397,-0.133944,0.001397,-0.033066,0.001397,0.011917,0.001397,-0.024338,0.001397,-0.051530,0.001397,0.150896,0.001397,-0.228107,0.001397,-0.060929
4,0.001862,-0.101213,0.001862,-0.029374,0.001862,0.016617,0.001862,-0.039109,0.001862,-0.050187,0.001862,0.215854,0.001862,-0.276783,0.001862,-0.087449
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6433,2.994672,-0.023667,2.994672,0.014603,2.994672,-0.030045,2.994672,-0.006882,2.994672,-0.008560,2.994672,-0.004868,2.994672,-0.006882,2.994672,0.039445
6434,2.995138,-0.025345,2.995138,0.012589,2.995138,-0.032059,2.995138,-0.007553,2.995138,-0.008225,2.995138,-0.009903,2.995138,-0.007553,2.995138,0.035752
6435,2.995603,-0.025177,2.995603,0.011917,2.995603,-0.032563,2.995603,-0.007889,2.995603,-0.006882,2.995603,-0.013596,2.995603,-0.008225,2.995603,0.023331
6436,2.996069,-0.026184,2.996069,0.014603,2.996069,-0.031724,2.996069,-0.006882,2.996069,-0.007217,2.996069,-0.015106,2.996069,-0.010407,2.996069,0.003861


In [8]:
def process_emg_file(file_path):
    try:
        # Read the file with the specific header structure
        df = pd.read_csv(file_path, header=[3, 4])
        
        # Clean the MultiIndex columns
        df.columns = pd.MultiIndex.from_tuples(
            [(c[0].split('(')[0].strip(), c[1].strip()) for c in df.columns]
        )
        
        # Extract Time column (assuming it's the first column)
        time_col = df.iloc[:, 0]
        
        # Extract EMG data (only columns containing 'mV' in level 1)
        # We use a try/except here in case a specific file doesn't have this section
        try:
            emg_data = df.xs('EMG 1 (mV)', level=1, axis=1).copy()
        except KeyError:
            print(f"Skipping {file_path.name}: 'EMG 1 (mV)' not found.")
            return None

        # Insert Time and Reindex
        emg_data.insert(0, 'Time', time_col)
        
        # Reorder columns to match target_order
        # Note: If a file is missing one of the target columns, this will insert NaN
        emg_data = emg_data.reindex(columns=target_order)
        
        return emg_data

    except Exception as e:
        print(f"Error processing {file_path.name}: {e}")
        return None

In [9]:
processed_files = {} # Dictionary to store your results

# Path(root_directory).rglob('*.csv') searches recursively for all csv files
print(f"Searching in: {root_directory}...\n")

for file_path in Path(root_directory).rglob('*.csv'):
    print(f"Processing: {file_path}")
    
    # Apply the logic
    processed_df = process_emg_file(file_path)
    
    # Store valid results
    if processed_df is not None:
        # We use the filename (or relative path) as the key
        key_name = f"{file_path.parent.name}/{file_path.name}"
        processed_files[key_name] = processed_df

print(f"\nProcessing complete. {len(processed_files)} files successfully processed.")

Searching in: data\cleaned\chipshot...

Processing: data\cleaned\chipshot\Ahesan\1.csv
Processing: data\cleaned\chipshot\Ahesan\2.csv
Processing: data\cleaned\chipshot\Ahesan\3.csv
Processing: data\cleaned\chipshot\Ahesan\4.csv
Processing: data\cleaned\chipshot\Ahesan\5.csv
Processing: data\cleaned\chipshot\Devansh\1.csv
Processing: data\cleaned\chipshot\Devansh\2.csv
Processing: data\cleaned\chipshot\Devansh\3.csv
Processing: data\cleaned\chipshot\Devansh\4.csv
Processing: data\cleaned\chipshot\Devansh\5.csv
Processing: data\cleaned\chipshot\Jordan\1.csv
Processing: data\cleaned\chipshot\Jordan\2.csv
Processing: data\cleaned\chipshot\Jordan\3.csv
Processing: data\cleaned\chipshot\Jordan\4.csv
Processing: data\cleaned\chipshot\Jordan\5.csv
Processing: data\cleaned\chipshot\Karan\1.csv
Processing: data\cleaned\chipshot\Karan\2.csv
Processing: data\cleaned\chipshot\Karan\3.csv
Processing: data\cleaned\chipshot\Karan\4.csv
Processing: data\cleaned\chipshot\Karan\5.csv
Processing: data\cle

In [10]:
if processed_files:
    first_key = list(processed_files.keys())[0]
    print(f"\nPreview of: {first_key}")
    display(processed_files[first_key].head())


Preview of: Ahesan/1.csv


Unnamed: 0,Time,Rectus Femoris right,Rectus Femoris left,Hamstrings right,Hamstrings left,TibilaisÂ Anterior right,TibilaisÂ Anterior left,Gastrocnemius right,Gastrocnemius left
0,0.0,0.017288,-0.003525,-0.031052,0.007889,-0.031891,0.016449,-0.003861,0.045823
1,0.000465,0.01796,-0.002853,-0.030381,-0.004532,-0.039109,0.004532,-0.002518,0.005203
2,0.000931,0.016617,-0.002518,-0.030381,-0.014939,-0.039109,-0.014939,-0.002182,-0.021317
3,0.001397,0.015946,-0.002518,-0.028702,-0.020478,-0.033738,-0.029374,-0.002853,-0.037095
4,0.001862,0.016617,-0.002853,-0.024506,-0.021653,-0.029038,-0.038102,-0.003861,-0.046494


In [11]:
output_directory = r"data\processed"

os.makedirs(output_directory, exist_ok=True)
print(f"Saving files to: {output_directory}...\n")

saved_count = 0

for key_name, df in processed_files.items():
    try:
        safe_filename = key_name.replace('\\', '_').replace('/', '_')
        save_path = os.path.join(output_directory, safe_filename)
        df.to_csv(save_path, index=False)
        
        print(f"Saved: {safe_filename}")
        saved_count += 1
        
    except Exception as e:
        print(f"Failed to save {key_name}: {e}")

print(f"\nSuccess! {saved_count} files have been saved to '{output_directory}'.")

Saving files to: data\processed...

Saved: Ahesan_1.csv
Saved: Ahesan_2.csv
Saved: Ahesan_3.csv
Saved: Ahesan_4.csv
Saved: Ahesan_5.csv
Saved: Devansh_1.csv
Saved: Devansh_2.csv
Saved: Devansh_3.csv
Saved: Devansh_4.csv
Saved: Devansh_5.csv
Saved: Jordan_1.csv
Saved: Jordan_2.csv
Saved: Jordan_3.csv
Saved: Jordan_4.csv
Saved: Jordan_5.csv
Saved: Karan_1.csv
Saved: Karan_2.csv
Saved: Karan_3.csv
Saved: Karan_4.csv
Saved: Karan_5.csv
Saved: Nihaal_1.csv
Saved: Nihaal_2.csv
Saved: Nihaal_3.csv
Saved: Nihaal_4.csv
Saved: Nihaal_5.csv
Saved: Soham_1.csv
Saved: Soham_2.csv
Saved: Soham_3.csv
Saved: Soham_4.csv
Saved: Soham_5.csv
Saved: Yadnesh_1.csv
Saved: Yadnesh_2.csv
Saved: Yadnesh_3.csv
Saved: Yadnesh_4.csv
Saved: Yadnesh_5.csv

Success! 35 files have been saved to 'data\processed'.
