## imports

In [None]:
import neurokit2 as nk
import os
import pandas as pd
import matplotlib as plt
from datetime import datetime, timedelta
import re
import warnings

# Specify the path to the desired directory
parent_dir = r'<<< PLACE HERE DIRECTORY WITH DATASET >>>'

# Change the current working directory to the specified directory
os.chdir(parent_dir)

mastertimesheet = pd.read_excel("mastertimesheet-4.xlsx")

# Add leading zero to p_id values below 10
mastertimesheet['p_id'] = mastertimesheet['p_id'].apply(lambda x: str(x).zfill(2))

# Verify that the working directory has been changed
print("Current working directory:", os.getcwd())

# Function to load file into a DataFrame
def load_file_into_dataframe(folder_path, var, filetype, sep=','):
    var_files = [f for f in os.listdir(folder_path) if f.endswith(filetype) and var in f]
    
    if var_files:
        file_path = os.path.join(folder_path, var_files[0])
        try:
            df = pd.read_csv(file_path, sep=sep)
            print(f"Loaded file: {file_path}")
            return df
        except pd.errors.EmptyDataError:
            print(f"The file {file_path} is empty.")
            return None
        except Exception as e:
            print(f"Error occurred while reading the file {file_path}: {e}")
            return None
    else:
        print(f"No file with '{var}' in its name found in folder {folder_path}.")
        return None

Current working directory: C:\Users\BootMR\Documents\data_export


## calc features all pids

In [21]:
warnings.filterwarnings("ignore", module="neurokit2")

In [22]:

### worked well 28th Jan
#(original)

''' Explanation: 

Here's a simplified explanation of what the script does:

    Window Definition:
        The script defines a time window of 30 seconds, 15 seconds before and 15 seconds after a button press.

    Folder Setup:
        The script looks at a root folder (root_folder_path) containing subfolders, each corresponding to a participant (p_id).
        It skips any participant IDs listed in skip_p_ids.

    Loading Files:
        The script has a function that loads files into pandas DataFrames based on a variable name (e.g., ECG or buttons data).
        It looks for files ending in .txt for ECG and .csv for button press data.

    Data Processing:
        For each participant folder:
            It tries to load two types of data: ECG data and button press data.
            If either data file is missing or empty, it skips the folder.
            If both data files are present, it processes the ECG data based on the button press timestamps.

    Button Press Window:
        For each button press:
            It filters the ECG data to the 30-second window (15 seconds before and after the button press).
            It then processes the ECG signals using neurokit2 to calculate heart rate variability (HRV) metrics.

    Saving Results:
        The script stores the HRV features for each button press in a DataFrame, along with the button press timestamp and rating.
        It saves the results in a CSV file named after the participant.

Key Points:

    Purpose: The script processes ECG data around button press events to extract heart rate variability (HRV) features.
    Files Processed: For each participant, it loads ECG data and button press data, processes them, and saves the results in a new CSV file.
    HRV Calculation: Uses neurokit2 for ECG processing and HRV feature extraction.

'''

# Define window length in seconds around button press, currently equal before and after
window_length = 30
interval_before = window_length / 2
interval_after = window_length / 2

# List of p_ids to skip
#skip_p_ids = []  # Add any p_ids you want to skip
skip_p_ids = [f"{i:02}" for i in range(1)]

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue
    
    if os.path.isdir(folder_path):
        
        print(f"Processing folder: {folder_path}")
        
        # Initialize an empty DataFrame to store the results for the current p_id
        button_features = pd.DataFrame()
        
        # Load ECG and buttons data
        ecg = load_file_into_dataframe(folder_path, '_ecg', '.txt', ';')
        buttons = load_file_into_dataframe(folder_path, 'buttons_gps.csv', '.csv', ',')
        
        if ecg is None or ecg.empty:
            print(f"ECG file is missing or empty in folder {folder_path}.")
            continue
        
        if buttons is None or buttons.empty:
            print(f"Buttons file is missing or empty in folder {folder_path}.")
            continue
        
        if ecg is not None and buttons is not None and ecg is not None:
            # Convert 'timestamp' columns to datetime format
            ecg['Phone timestamp'] = pd.to_datetime(ecg['Phone timestamp'])
            buttons['timestamp_button'] = pd.to_datetime(buttons['timestamp_button'])
                        
            # Iterate through each timestamp in buttons
            for index, row in buttons.iterrows():
                timestamp = row['timestamp_button']
                rating = row['rating']

                ######### filter on timestamp
                                
                # Define the time range for filtering (15 seconds before and after the timestamp)
                start_time = timestamp - pd.Timedelta(seconds=interval_before)
                end_time = timestamp + pd.Timedelta(seconds=interval_after)
                
                # Filter on the time range
                filtered_ecg = ecg[(ecg['Phone timestamp'] >= start_time) & (ecg['Phone timestamp'] <= end_time)]

                ######### HRV feature calculation with neurokit
                
                # Extract ECG data for processing
                filtered_ecg_ecgonly = filtered_ecg['ecg [uV]']
                
                # Process raw ECG data into QRS metrics
                print(f'start nk.ecg_process for timestamp {timestamp}')
                filtered_ecg_ecgonly_signals, _ = nk.ecg_process(filtered_ecg_ecgonly, sampling_rate=130, method='neurokit')
                
                # Calculate HRV features
                print(f'start nk.hrv for timestamp {timestamp}')
                hrv_features = nk.hrv(filtered_ecg_ecgonly_signals['ECG_R_Peaks'], sampling_rate=130, nperseg=256, show=False)
                
                # Insert 'timestamp' and 'rating' as the first columns in hrv_features DataFrame
                hrv_features.insert(0, 'button_timestamp', timestamp)
                hrv_features.insert(1, 'rating', rating)
                hrv_features.insert(2, 'window_start_time', start_time)
                hrv_features.insert(3, 'window_end_time', end_time)
                
                button_features = pd.concat([button_features, hrv_features], ignore_index=True)

            # Merge buttons and button_features on 'timestamp_button'
            merged_features = pd.merge(buttons, button_features, how='left', left_on='timestamp_button', right_on='button_timestamp')

            merged_features.drop(columns=['button_timestamp','rating_y'], inplace=True)
            merged_features.rename(columns={'rating_x': 'rating'}, inplace=True)
            first_column = merged_features.pop('rating')
            merged_features.insert(0, 'rating', first_column)
 
            # Save the concatenated DataFrame to CSV
            output_file_path = os.path.join(folder_path, f"{p_id}_ratingsECGfeatures.csv")
            merged_features.to_csv(output_file_path, index=None)
            print(f"{output_file_path} successfully saved for {p_id}")
        else:
            print(f"Skipping folder {folder_path} due to missing data files.")

Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with 'buttons_gps.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
ECG file is missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111821_ecg.txt
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_buttons_gps.csv
start nk.ecg_process for timestamp 2024-05-22 11:43:04.127494
start nk.hrv for timestamp 2024-05-22 11:43:04.127494
start nk.ecg_process for timestamp 2024-05-22 11:47:22.196925
start nk.hrv for timestamp 2024-05-22 11:47:22.196925
start nk.ecg_process for timestamp 2024-05-22 11:50:47.987902
start nk.hrv for timestamp 2024-05-22 11:50:47.987902
start nk.ecg_process for timestamp 2024-05

## calc descriptive stats

In [23]:

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue
    
    if os.path.isdir(folder_path):
        
        print(p_id)
        
        # Initialize an empty DataFrame to store the results for the current p_id
        button_features = pd.DataFrame()
        
        # Load ECG and buttons data
        ratingsECGfeatures = load_file_into_dataframe(folder_path, 'ratingsECGfeatures.csv', '.csv', ',')
        hr = load_file_into_dataframe(folder_path, '_hr', '.txt', ';')
        
        if ratingsECGfeatures is None or ratingsECGfeatures.empty:
            print(f"ratingsECGfeatures missing or empty in folder {folder_path}.")
            continue
        
        if hr is None or hr.empty:
            print(f"hr missing or empty in folder {folder_path}.")
            continue
        
        if ratingsECGfeatures is not None and hr is not None:
            # Convert 'timestamp' columns to datetime format
            hr['Phone timestamp'] = pd.to_datetime(hr['Phone timestamp'])
            ratingsECGfeatures['timestamp_button'] = pd.to_datetime(ratingsECGfeatures['timestamp_button'])
                        
            # Iterate through each timestamp in ratingsECGfeatures
            for index, row in ratingsECGfeatures.iterrows():
                timestamp = row['timestamp_button']
                rating = row['rating']

                ######### filter on timestamp
                                
                # Define the time range for filtering (15 seconds before and after the timestamp)
                start_time = row['window_start_time']
                end_time = row['window_end_time']
                
                # Filter on the time range
                filtered_hr = hr[(hr['Phone timestamp'] >= start_time) & (hr['Phone timestamp'] <= end_time)]
                
                ######### Descriptive statistic calculation

                # Compute descriptive statistics for HR [bpm] and HRV [ms]
                hr_column = 'HR [bpm]'
                hrv_column = 'HRV [ms]'
                            
                hr_mean = round(filtered_hr[hr_column].mean(), 2)
                hr_stdev = round(filtered_hr[hr_column].std(), 2)
                hrv_mean = round(filtered_hr[hrv_column].mean(), 2)
                hrv_stdev = round(filtered_hr[hrv_column].std(), 2)

                # Create a dictionary with the results
                stats = {
                    'hr_mean': hr_mean,
                    'hr_stdev': hr_stdev,
                    'hrv_mean': hrv_mean,
                    'hrv_stdev': hrv_stdev
                }

                # Add the stats as new columns to the ratingsECGfeatures row
                for stat_name, stat_value in stats.items():
                    ratingsECGfeatures.loc[index, stat_name] = stat_value

            ######### Reorder the columns: Place new stats columns after 'window_end_time'
            # Get the columns order
            column_order = list(ratingsECGfeatures.columns)

            # Find the index of 'window_end_time' and insert the stats columns after it
            window_end_time_index = column_order.index('window_end_time')

            # Reorder columns to place the new stats in the desired location
            new_column_order = (
                column_order[:window_end_time_index + 1] + 
                ['hr_mean', 'hr_stdev', 'hrv_mean', 'hrv_stdev'] + 
                column_order[window_end_time_index + 1:]
            )
            
            ratingsECGfeatures = ratingsECGfeatures[new_column_order]

        # Optionally save the updated DataFrame
        output_file_path = os.path.join(folder_path, f"{p_id}_ratings_wHRV.csv")
        ratingsECGfeatures.to_csv(output_file_path, index=False)
        print(f"Saved {output_file_path}")





00-code_export
No file with 'ratingsECGfeatures.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
ratingsECGfeatures missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
03
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures.csv
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111820_hr.txt
Saved C:\Users\BootMR\Documents\data_export\03\03_ratings_wHRV.csv
04
No file with 'ratingsECGfeatures.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
ratingsECGfeatures missing or empty in folder C:\Users\BootMR\Documents\data_export\04.


## calc baseline all pids

In [24]:
# worked well 30th Jan. computes baseline descriptive stats and HRV features for all pids
#succesfully svaed 46 baseline files


skip_p_ids = [f"{i:02}" for i in range(1)]

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):

    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue

    print(p_id)
    folder_path = os.path.join(parent_dir, p_id)

    output_file_path = os.path.join(folder_path, f"{p_id}_baseline_HRVfeatures.csv")

    if os.path.isdir(folder_path):
        # Check if the output file already exists; if so, skip processing
        if os.path.exists(output_file_path):
            print(f"File {p_id}_baseline_ECGfeatures.csv already exists. Skipping folder {folder_path}.")
            continue
        
        # Initialize an empty DataFrame to store the results for the current p_id
        button_features = pd.DataFrame()
        
        # Load ECG and buttons data
        ecg = load_file_into_dataframe(folder_path, '_ecg', '.txt', ';')
        hr = load_file_into_dataframe(folder_path, '_hr', '.txt', ';')

        if ecg is None:
            print(f"ECG file is missing or empty in folder {folder_path}.")

        if ecg is not None:
            # Convert 'timestamp' columns to datetime format
            ecg['Phone timestamp'] = pd.to_datetime(ecg['Phone timestamp'])
            hr['Phone timestamp'] = pd.to_datetime(hr['Phone timestamp'])

            ######### select baseline data by timestamps

            mask = mastertimesheet['p_id'] == p_id
            if mask.any():
                idx = mastertimesheet.index[mask][0]
                # Check if both start and end times are present in the mastertimesheet
                startt0 = mastertimesheet.loc[idx, 'startt0']
                startt1 = mastertimesheet.loc[idx, 'startt1']
                
                if pd.isna(startt0) or pd.isna(startt1):
                    print(f"Missing start or end time for p_id {p_id}. Skipping.")
                    continue
                
                # Set start and end time based on startt0 and startt1
                start_time = pd.to_datetime(startt0)
                end_time = pd.to_datetime(startt1)
            else:
                print(f"No matching entry found in mastertimesheet for p_id {p_id}")
                continue

            # Filter DataFrames based on the time range
            # Filter ecg DataFrame based on the time range
            filtered_hr = hr[(hr['Phone timestamp'] >= start_time) & (hr['Phone timestamp'] <= end_time)]
            filtered_ecg = ecg[(ecg['Phone timestamp'] >= start_time) & (ecg['Phone timestamp'] <= end_time)]

            # Check if there is any data in the filtered_ecg DataFrame
            if filtered_ecg.empty:
                print(f"No ECG data found between {start_time} and {end_time}. Skipping {p_id}.")
                continue

            # Check if there is any data in the filtered_hr DataFrame
            if filtered_hr.empty:
                print(f"No hr data found between {start_time} and {end_time}. Skipping {p_id}.")
                continue
            
            ######### HRV feature calculation with neurokit

            # Extract ECG data for processing
            filtered_ecg_ecgonly = filtered_ecg['ecg [uV]']
            
            # Process raw ECG data into QRS metrics
            #print(f'start nk.ecg_process for timestamp {timestamp}')
            filtered_ecg_ecgonly_signals, _ = nk.ecg_process(filtered_ecg_ecgonly, sampling_rate=100, method='neurokit')
            
            # Calculate HRV features
            print(f'start nk.hrv for {start_time} until {end_time}')
            hrv_baseline = nk.hrv(filtered_ecg_ecgonly_signals['ECG_R_Peaks'], sampling_rate=100, nperseg=256, show=False)

            ######### Descriptive statistic calculation

            # Compute descriptive statistics for HR [bpm] and HRV [ms]
            hr_column = 'HR [bpm]'
            hrv_column = 'HRV [ms]'
                        
            hr_mean = round(filtered_hr[hr_column].mean(), 2)
            hr_stdev = round(filtered_hr[hr_column].std(), 2)
            hrv_mean = round(filtered_hr[hrv_column].mean(), 2)
            hrv_stdev = round(filtered_hr[hrv_column].std(), 2)

            # Create a dictionary with the results
            stats = {
                'hr_mean': hr_mean,
                'hr_stdev': hr_stdev,
                'hrv_mean': hrv_mean,
                'hrv_stdev': hrv_stdev
            }

            ######### merge and store

            stats_df = pd.DataFrame([stats])

            # Ensure baseline_ECGfeatures is not None before modifying it
            if hrv_baseline is not None:
                # Concatenate stats_df and baseline_ECGfeatures, ensuring stats come first
                hrv_baseline = pd.concat([stats_df, hrv_baseline], axis=1)
            else:
                print(f"hrv_baseline is missing or could not be loaded for p_id {p_id}. Skipping.")
                continue
        
            # Save the results to a CSV file named after the p_id
            
            hrv_baseline.to_csv(output_file_path, index=None)
            print(f"{output_file_path} successfully SAVED for {p_id}")
        else:
            print(f"Skipping folder {folder_path} due to missing data files.")

00-code_export
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
ECG file is missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
Skipping folder C:\Users\BootMR\Documents\data_export\00-code_export due to missing data files.
03
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111821_ecg.txt
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111820_hr.txt
start nk.hrv for 2024-05-22 11:28:00 until 2024-05-22 11:42:00
C:\Users\BootMR\Documents\data_export\03\03_baseline_HRVfeatures.csv successfully SAVED for 03
04
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
ECG file is missing or empty in folder C:\Users\BootMR\Do

## baseline correction

In [25]:
########### need this one

## adapt and re use this script: 

skip_p_ids = [f"{i:02}" for i in range(1)]


# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue
    
    if os.path.isdir(folder_path):
        
        print(f"Processing folder: {folder_path}")
        
        # Initialize an empty DataFrame to store the results for the current p_id
        button_features = pd.DataFrame()
        
        # Load ECG and buttons data
        baseline_ECGfeatures = load_file_into_dataframe(folder_path, '_baseline_HRVfeatures.csv', '.csv', ',')      
        ratings_wHRV = load_file_into_dataframe(folder_path, 'ratings_wHRV.csv', '.csv', ',')

        if baseline_ECGfeatures is None or baseline_ECGfeatures.empty:
            print(f"baseline_ECGfeatures missing or empty in folder {folder_path}.")
            continue
        
        if ratings_wHRV is None or ratings_wHRV.empty:
            print(f"ratings_wHRV missing or empty in folder {folder_path}.")
            continue
        
        if baseline_ECGfeatures is not None and ratings_wHRV is not None:
            # Convert 'timestamp' column to datetime format
            ratings_wHRV['timestamp_button'] = pd.to_datetime(ratings_wHRV['timestamp_button'])

            # Ensure all columns in baseline_ECGfeatures exist in ratings_wHRV
            for col in baseline_ECGfeatures.columns:
                if col not in ratings_wHRV.columns:
                    ratings_wHRV[col] = float('nan')  # Fill missing columns with NaN

            # Select only the HRV feature columns (excluding non-HRV columns)
            hrv_columns = baseline_ECGfeatures.columns

            # Subtract baseline features from button ECG features
            ratings_wHRV[hrv_columns] = ratings_wHRV[hrv_columns] - baseline_ECGfeatures.iloc[0]

            # Z-standardize each HRV feature
            #ratings_wHRV[hrv_columns] = (ratings_wHRV[hrv_columns] - ratings_wHRV[hrv_columns].mean()) / ratings_wHRV[hrv_columns].std()

            # Save the concatenated DataFrame to CSV
            output_file_path = os.path.join(folder_path, f"{p_id}_ratings_HRV_baselinecorrected.csv")
            ratings_wHRV.to_csv(output_file_path, index=None)
            print(f"{output_file_path} successfully saved for {p_id}")
        else:
            print(f"Skipping folder {folder_path} due to missing data files.")



Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
No file with '_baseline_HRVfeatures.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with 'ratings_wHRV.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
baseline_ECGfeatures missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_baseline_HRVfeatures.csv
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratings_wHRV.csv
C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected.csv successfully saved for 03
Processing folder: C:\Users\BootMR\Documents\data_export\04
No file with '_baseline_HRVfeatures.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
No file with 'ratings_wHRV.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\

## merge baselines into one

In [26]:

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through each subfolder in the parent directory
for subdir, dirs, files in os.walk(parent_dir):
    # Check each file in the subfolder
    for file in files:
        if 'baseline_HRVfeatures' in file:
            file_path = os.path.join(subdir, file)
            print(f"Processing file: {file_path}")
            
            # Load the CSV file into a DataFrame
            df = pd.read_csv(file_path)
            
            # Append the DataFrame to the list
            dfs.append(df)

# Concatenate all DataFrames in the list
if dfs:
    all_baseline_HRVfeatures = pd.concat(dfs, ignore_index=True)
    
    # Define the output file path
    output_file_path = os.path.join(parent_dir, 'all_baselineHRVfeatures.csv')
    
    # Save the concatenated DataFrame to CSV
    all_baseline_HRVfeatures.to_csv(output_file_path, index=False)
    print(f"All files merged into: {output_file_path}")
else:
    print("No files found with 'baseline_HRVfeatures' in the filename.")

Processing file: C:\Users\BootMR\Documents\data_export\03\03_baseline_HRVfeatures.csv
All files merged into: C:\Users\BootMR\Documents\data_export\all_baselineHRVfeatures.csv


## merge ratings_wHRV into one

In [27]:
## merge all individual files into 1 large

# Initialize a list to store DataFrames
dataframes = []

# Iterate over all subfolders
for subdir, _, files in os.walk(parent_dir):
    # Skip the root directory itself
    if subdir == parent_dir:
        continue
    
    # Extract the subfolder name
    subfolder_name = os.path.basename(subdir)
    
    # Check for files containing "FlirtNkFeatures" in the current subfolder
    for file in files:
        if "ratings_wHRV" in file:
            file_path = os.path.join(subdir, file)
            try:
                # Read the file into a DataFrame
                df = pd.read_csv(file_path)
                
                # Add the subfolder name as a new column
                df['p_id'] = subfolder_name
                
                # Reorder the columns
                cols = df.columns.tolist()
                reordered_cols = ['rating', 'timestamp_button', 'p_id'] + [col for col in cols if col not in ['rating', 'timestamp_button', 'p_id']]
                df = df[reordered_cols]
                
                # Append the DataFrame to the list
                dataframes.append(df)
                print(f"Loaded file: {file_path}, Rows: {len(df)}")
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")

# Concatenate all DataFrames
if dataframes:
    merged_df = pd.concat(dataframes, ignore_index=True)
    
    # Save the merged DataFrame to a CSV file
    output_file_path = os.path.join(parent_dir, "all_ratingswHRV.csv")
    merged_df.to_csv(output_file_path, index=False)
    print(f"Saved merged DataFrame to: {output_file_path}, Total Rows: {len(merged_df)}")
else:
    print("No files with 'FlirtNkFeatures' found in any subfolder.")


Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratings_wHRV.csv, Rows: 8
Saved merged DataFrame to: C:\Users\BootMR\Documents\data_export\all_ratingswHRV.csv, Total Rows: 8


## merge ratings_HRV_corrected into one

In [29]:
## merge all individual files into 1 large

# Initialize a list to store DataFrames
dataframes = []

# Iterate over all subfolders
for subdir, _, files in os.walk(parent_dir):
    # Skip the root directory itself
    if subdir == parent_dir:
        continue
    
    # Extract the subfolder name
    subfolder_name = os.path.basename(subdir)
    
    # Check for files containing "FlirtNkFeatures" in the current subfolder
    for file in files:
        if "ratings_HRV_baselinecorrected" in file:
            file_path = os.path.join(subdir, file)
            try:
                # Read the file into a DataFrame
                df = pd.read_csv(file_path)
                
                # Add the subfolder name as a new column
                df['p_id'] = subfolder_name
                
                # Reorder the columns
                cols = df.columns.tolist()
                reordered_cols = ['rating', 'timestamp_button', 'p_id'] + [col for col in cols if col not in ['rating', 'timestamp_button', 'p_id']]
                df = df[reordered_cols]
                
                # Append the DataFrame to the list
                dataframes.append(df)
                print(f"Loaded file: {file_path}, Rows: {len(df)}")
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")

# Concatenate all DataFrames
if dataframes:
    merged_df = pd.concat(dataframes, ignore_index=True)
    
    # Save the merged DataFrame to a CSV file
    output_file_path = os.path.join(parent_dir, "all_ratingswHRV_corrected.csv")
    merged_df.to_csv(output_file_path, index=False)
    print(f"Saved merged DataFrame to: {output_file_path}, Total Rows: {len(merged_df)}")
else:
    print("No files with 'FlirtNkFeatures' found in any subfolder.")


Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected.csv, Rows: 8
Saved merged DataFrame to: C:\Users\BootMR\Documents\data_export\all_ratingswHRV_corrected.csv, Total Rows: 8
