In [1]:
import os
import pandas as pd
from scipy import interpolate

import numpy as np

def resample_y(x1, y1, x2):
    interp_func = interpolate.interp1d(x1, y1, kind='linear')
    return interp_func(x2)

# Create the output folder if it does not exist
output_folder = './modified_datasets/'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Iterate through all CSV files in the input folder
for file_name in os.listdir('./datasets/'):
    if file_name.endswith('.csv'):
        print(f'Processing file: {file_name}')
        # Read the CSV file into a DataFrame
        data = pd.read_csv(os.path.join('./datasets', file_name))

        # Get a list of all column names
        column_names = list(data.columns)

        # Initialize a counter for the new column names
        count = 1

        # Iterate through the column names
        for i in range(len(column_names)):
            if column_names[i] == "X [s]":
                if count == 1:
                    new_name = "X [s].0"
                else:
                    new_name = "X [s]." + str(count)
                data = data.rename(columns={column_names[i]: new_name})
                count += 1

        # Save the modified DataFrame to a new CSV file with the same name as the input file
        output_file_name = os.path.join(output_folder, file_name)
        data.to_csv(output_file_name, index=False)

        # Select the columns for resampling and combine the data into a single table
        time_col = data["X [s].0"]
        emg_cols = [col for col in data.columns if "EMG" in col]
        acc_cols = [col for col in data.columns if "Acc" in col]
        min_time = time_col.min()
        max_time = time_col.max()
        sample_rate = 30  # Change this to the desired sampling rate
        new_max_time = max_time - 1/sample_rate
        new_time = pd.Series(data=np.arange(min_time, new_max_time+1/sample_rate, 1/sample_rate), name="X [s]")
        emg_resampled = pd.DataFrame()
        acc_resampled = pd.DataFrame()

        for col in emg_cols:
            new_y = resample_y(data["X [s].0"], data[col], new_time)
            emg_resampled[col] = new_y

        for col in acc_cols:
            new_y = resample_y(data["X [s].1"], data[col], new_time)
            acc_resampled[col] = new_y

        data_combined = pd.concat([new_time, emg_resampled, acc_resampled], axis=1)
        data_combined.to_csv(output_file_name, index=False)

        print(f'File {file_name} processing complete.')


Processing file: QuietStance.csv
File QuietStance.csv processing complete.
Processing file: 3_1_mph.csv
File 3_1_mph.csv processing complete.
Processing file: SelfSelected.csv
File SelfSelected.csv processing complete.
Processing file: StandStop_2mph.csv
File StandStop_2mph.csv processing complete.
Processing file: 2_5_mph.csv
File 2_5_mph.csv processing complete.


In [2]:
import pandas as pd
import os

# Set the folder paths for input and output
input_folder = './modified_datasets/'
output_file = './combined_data.csv'

# Initialize an empty list to store the dataframes
dfs = []

# Iterate through all CSV files in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        # Read the CSV file into a dataframe
        df = pd.read_csv(os.path.join(input_folder, file_name))
        # Append the dataframe to the list
        dfs.append(df)

# Concatenate all dataframes in the list into a single dataframe
combined_df = pd.concat(dfs)

# Save the combined dataframe to a CSV file
combined_df.to_csv(output_file, index=False)
