## import & config

In [None]:
import neurokit2 as nk
import os
import pandas as pd
import matplotlib as plt
from datetime import datetime, timedelta
import re
import warnings
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer


# Specify the path to the desired directory
parent_dir = r'<<< PLACE HERE DIRECTORY WITH DATASET >>>'

# Change the current working directory to the specified directory
os.chdir(parent_dir)

mastertimesheet = pd.read_excel("mastertimesheet-4.xlsx")

# Add leading zero to p_id values below 10
mastertimesheet['p_id'] = mastertimesheet['p_id'].apply(lambda x: str(x).zfill(2))

# Verify that the working directory has been changed
print("Current working directory:", os.getcwd())

# Function to load file into a DataFrame
def load_file_into_dataframe(folder_path, var, filetype, sep=','):
    var_files = [f for f in os.listdir(folder_path) if f.endswith(filetype) and var in f]
    
    if var_files:
        file_path = os.path.join(folder_path, var_files[0])
        try:
            df = pd.read_csv(file_path, sep=sep)
            print(f"Loaded file: {file_path}")
            return df
        except pd.errors.EmptyDataError:
            print(f"The file {file_path} is empty.")
            return None
        except Exception as e:
            print(f"Error occurred while reading the file {file_path}: {e}")
            return None
    else:
        print(f"No file with '{var}' in its name found in folder {folder_path}.")
        return None

warnings.filterwarnings("ignore", module="neurokit2")



Current working directory: C:\Users\BootMR\Documents\data_export


## merge features and stats

In [2]:
#### worked well 1st March
# all features and descriptive stats


# Define window configurations
window_configs = [
    (30, 15, 15, "{p_id}_ratingsECGfeatures_30s.csv"),
    (60, 30, 30, "{p_id}_ratingsECGfeatures_60s1.csv"),
    (60, 20, 40, "{p_id}_ratingsECGfeatures_60s2.csv"),
    (120, 60, 60, "{p_id}_ratingsECGfeatures_120s.csv")
]

# List of p_ids to skip
skip_p_ids = [f"{i:02}" for i in range(1)]

# Iterate through each participant's folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue
    
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_path}")
        
        # Load ECG, buttons, and HR data
        ecg = load_file_into_dataframe(folder_path, '_ecg', '.txt', ';')
        buttons = load_file_into_dataframe(folder_path, 'buttons_gps.csv', '.csv', ',')
        hr = load_file_into_dataframe(folder_path, '_hr', '.txt', ';')

        if ecg is None or ecg.empty:
            print(f"ECG file is missing or empty in folder {folder_path}.")
            continue
        if buttons is None or buttons.empty:
            print(f"Buttons file is missing or empty in folder {folder_path}.")
            continue
        if hr is None or hr.empty:
            print(f"HR file is missing or empty in folder {folder_path}.")
            continue
        
        # Convert timestamps to datetime
        ecg['Phone timestamp'] = pd.to_datetime(ecg['Phone timestamp'])
        buttons['timestamp_button'] = pd.to_datetime(buttons['timestamp_button'])
        hr['Phone timestamp'] = pd.to_datetime(hr['Phone timestamp'])

        # Iterate over different time windows
        for window_length, interval_before, interval_after, filename_template in window_configs:
            print(f"Processing time window: {window_length}s for {p_id}")
            button_features = pd.DataFrame()
            
            with tqdm(total=len(buttons), desc=f"Processing {p_id} - {window_length}s", unit="entry") as pbar:
                for index, row in buttons.iterrows():
                    timestamp = row['timestamp_button']
                    rating = row['rating']
                    
                    start_time = timestamp - pd.Timedelta(seconds=interval_before)
                    end_time = timestamp + pd.Timedelta(seconds=interval_after)
                    
                    # Filter ECG data within the time window
                    filtered_ecg = ecg[(ecg['Phone timestamp'] >= start_time) & (ecg['Phone timestamp'] <= end_time)]
                    
                    if filtered_ecg.empty:
                        pbar.update(1)
                        continue
                    
                    # Extract ECG signal and process HRV features
                    filtered_ecg_ecgonly = filtered_ecg['ecg [uV]']
                    filtered_ecg_ecgonly_signals, _ = nk.ecg_process(filtered_ecg_ecgonly, sampling_rate=130, method='neurokit')
                    hrv_features = nk.hrv(filtered_ecg_ecgonly_signals['ECG_R_Peaks'], sampling_rate=130, nperseg=256, show=False)
                    
                    # Compute HR statistics in the same window
                    filtered_hr = hr[(hr['Phone timestamp'] >= start_time) & (hr['Phone timestamp'] <= end_time)]
                    
                    if not filtered_hr.empty:
                        hr_mean = round(filtered_hr['HR [bpm]'].mean(), 2)
                        hr_stdev = round(filtered_hr['HR [bpm]'].std(), 2)
                        hrv_mean = round(filtered_hr['HRV [ms]'].mean(), 2)
                        hrv_stdev = round(filtered_hr['HRV [ms]'].std(), 2)
                    else:
                        hr_mean, hr_stdev, hrv_mean, hrv_stdev = None, None, None, None
                    
                    # Add computed features to the dataframe
                    hrv_features.insert(0, 'button_timestamp', timestamp)
                    hrv_features.insert(1, 'rating', rating)
                    hrv_features.insert(2, 'window_start_time', start_time)
                    hrv_features.insert(3, 'window_end_time', end_time)
                    hrv_features['hr_mean'] = hr_mean
                    hrv_features['hr_stdev'] = hr_stdev
                    hrv_features['hrv_mean'] = hrv_mean
                    hrv_features['hrv_stdev'] = hrv_stdev
                    
                    # Append to results dataframe
                    button_features = pd.concat([button_features, hrv_features], ignore_index=True)
                    pbar.update(1)
            
            # Merge with button press data and save results
            merged_features = pd.merge(buttons, button_features, how='left', left_on='timestamp_button', right_on='button_timestamp')
            merged_features.drop(columns=['button_timestamp', 'rating_y'], inplace=True)
            merged_features.rename(columns={'rating_x': 'rating'}, inplace=True)
            merged_features.insert(0, 'rating', merged_features.pop('rating'))

            # Insert HR statistics in correct column order
            column_order = list(merged_features.columns)
            if 'HRV_MeanNN' in column_order:
                window_end_time_index = column_order.index('window_end_time')
                hrv_mean_nn_index = column_order.index('HRV_MeanNN')

                new_column_order = (
                    column_order[:window_end_time_index + 1] +
                    ['hr_mean', 'hr_stdev', 'hrv_mean', 'hrv_stdev'] +
                    column_order[hrv_mean_nn_index:]
                )

                merged_features = merged_features[new_column_order]
            
            # Delete the last 4 columns
            merged_features = merged_features.iloc[:, :-4]

            # Save the results
            output_file_path = os.path.join(folder_path, filename_template.format(p_id=p_id))
            merged_features.to_csv(output_file_path, index=False)
            print(f"{output_file_path} successfully saved with HRV and HR statistics for {p_id}")

print("Processing complete.")


Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with 'buttons_gps.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
ECG file is missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111821_ecg.txt
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_buttons_gps.csv
Loaded file: C:\Users\BootMR\Documents\data_export\03\polar_h10_cbd9da26_20240522_111820_hr.txt
Processing time window: 30s for 03


Processing 03 - 30s: 100%|██████████| 8/8 [00:03<00:00,  2.59entry/s]


C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_30s.csv successfully saved with HRV and HR statistics for 03
Processing time window: 60s for 03


Processing 03 - 60s: 100%|██████████| 8/8 [00:05<00:00,  1.37entry/s]


C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s1.csv successfully saved with HRV and HR statistics for 03
Processing time window: 60s for 03


Processing 03 - 60s: 100%|██████████| 8/8 [00:06<00:00,  1.32entry/s]


C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s2.csv successfully saved with HRV and HR statistics for 03
Processing time window: 120s for 03


Processing 03 - 120s: 100%|██████████| 8/8 [00:13<00:00,  1.70s/entry]

C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_120s.csv successfully saved with HRV and HR statistics for 03
Processing folder: C:\Users\BootMR\Documents\data_export\04
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
Loaded file: C:\Users\BootMR\Documents\data_export\04\04_buttons_gps.csv
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\04.
ECG file is missing or empty in folder C:\Users\BootMR\Documents\data_export\04.
Processing folder: C:\Users\BootMR\Documents\data_export\FB
No file with '_ecg' in its name found in folder C:\Users\BootMR\Documents\data_export\FB.
No file with 'buttons_gps.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\FB.
No file with '_hr' in its name found in folder C:\Users\BootMR\Documents\data_export\FB.
ECG file is missing or empty in folder C:\Users\BootMR\Documents\data_export\FB.
Processing complete.





## baseline correction

In [3]:
####### worked well 1st March


skip_p_ids = [f"{i:02}" for i in range(1)]

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if p_id in skip_p_ids:
        print(f"Skipping folder as instructed: {folder_path} (p_id {p_id})")
        continue
    
    if os.path.isdir(folder_path):
        
        print(f"Processing folder: {folder_path}")
        
        # Load the baseline ECG features file
        baseline_ECGfeatures = load_file_into_dataframe(folder_path, '_baseline_HRVfeatures.csv', '.csv', ',')      

        if baseline_ECGfeatures is None or baseline_ECGfeatures.empty:
            print(f"baseline_ECGfeatures missing or empty in folder {folder_path}.")
            continue
        
        # List of 4 feature files for the current folder
        window_configs = [
            (30, f"{p_id}_ratingsECGfeatures_30s.csv"),
            (60, f"{p_id}_ratingsECGfeatures_60s1.csv"),
            (60, f"{p_id}_ratingsECGfeatures_60s2.csv"),
            (120, f"{p_id}_ratingsECGfeatures_120s.csv")
        ]
        
        for window_length, filename in window_configs:
            file_path = os.path.join(folder_path, filename)
            
            # Load the ratings ECG features for the current file
            ratings_wHRV = load_file_into_dataframe(folder_path, filename, '.csv', ',')

            if ratings_wHRV is None or ratings_wHRV.empty:
                print(f"{filename} missing or empty in folder {folder_path}.")
                continue
            
            # Convert 'timestamp_button' column to datetime
            ratings_wHRV['timestamp_button'] = pd.to_datetime(ratings_wHRV['timestamp_button'])

            # Ensure all columns in baseline_ECGfeatures exist in ratings_wHRV
            for col in baseline_ECGfeatures.columns:
                if col not in ratings_wHRV.columns:
                    ratings_wHRV[col] = float('nan')  # Fill missing columns with NaN

            # Select only the HRV feature columns (excluding non-HRV columns)
            hrv_columns = baseline_ECGfeatures.columns

            # Subtract baseline features from button ECG features for baseline correction
            ratings_wHRV[hrv_columns] = ratings_wHRV[hrv_columns] - baseline_ECGfeatures.iloc[0]

            # Optionally, Z-standardize each HRV feature (uncomment if needed)
            # ratings_wHRV[hrv_columns] = (ratings_wHRV[hrv_columns] - ratings_wHRV[hrv_columns].mean()) / ratings_wHRV[hrv_columns].std()

            # Construct the output filename based on the format you want
            if window_length == 60:
                # Check if it's the first or second 60s file and set the suffix accordingly
                if "60s1" in filename:
                    suffix = "60s1"
                else:
                    suffix = "60s2"
            else:
                # For 30s and 120s, use window_length directly
                suffix = f"{window_length}s"

            output_file_path = os.path.join(folder_path, f"{p_id}_ratings_HRV_baselinecorrected_{suffix}.csv")
            
            # Save the baseline-corrected DataFrame to CSV for the current feature file
            ratings_wHRV.to_csv(output_file_path, index=False)
            print(f"Baseline-corrected file saved: {output_file_path} for {p_id}")
            
        print(f"Finished processing folder {folder_path}")
    else:
        print(f"Skipping non-directory: {folder_path}")



Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
No file with '_baseline_HRVfeatures.csv' in its name found in folder C:\Users\BootMR\Documents\data_export\00-code_export.
baseline_ECGfeatures missing or empty in folder C:\Users\BootMR\Documents\data_export\00-code_export.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_baseline_HRVfeatures.csv
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_30s.csv
Baseline-corrected file saved: C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected_30s.csv for 03
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s1.csv
Baseline-corrected file saved: C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected_60s1.csv for 03
Loaded file: C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s2.csv
Baseline-corrected file saved: C:\Users\BootMR\Documents\data_expo

## merge into one

In [7]:
####### merge non-baselinecorrected


# Suffixes to group files by
suffixes = ['30s', '60s1', '60s2', '120s']

# Initialize a dictionary to hold dataframes for each suffix
merged_data = {suffix: [] for suffix in suffixes}

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_path}")
        
        # Check each suffix group
        for suffix in suffixes:
            # Construct the filename pattern for the current suffix
            filename = f"{p_id}_ratingsECGfeatures_{suffix}.csv"
            file_path = os.path.join(folder_path, filename)
            
            # If the file exists, read it and append to the corresponding list
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                df.insert(0, 'p_id', p_id)  # Add subfolder name as the first column
                merged_data[suffix].append(df)
                print(f"Added {file_path} to merge group '{suffix}'")
            else:
                print(f"File {file_path} does not exist.")

# Merge the dataframes for each suffix and save to a new CSV file
for suffix, dfs in merged_data.items():
    if dfs:
        # Concatenate all dataframes in the list
        merged_df = pd.concat(dfs, ignore_index=True)
        
        # Save the merged dataframe to a new CSV file
        output_file_path = os.path.join(parent_dir, f"merged_{suffix}.csv")
        merged_df.to_csv(output_file_path, index=False)
        print(f"Merged file saved: {output_file_path}")
    else:
        print(f"No files to merge for suffix '{suffix}'")


Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratingsECGfeatures_30s.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratingsECGfeatures_60s1.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratingsECGfeatures_60s2.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratingsECGfeatures_120s.csv does not exist.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Added C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_30s.csv to merge group '30s'
Added C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s1.csv to merge group '60s1'
Added C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_60s2.csv to merge group '60s2'
Added C:\Users\BootMR\Documents\data_export\03\03_ratingsECGfeatures_120s.csv to merge group '120s'
Pro

In [8]:
######## merge baselinecorrected

# Initialize a dictionary to hold dataframes for each suffix
merged_data = {suffix: [] for suffix in suffixes}

# Iterate through each subfolder in the root folder
for p_id in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, p_id)
    
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_path}")
        
        # Check each suffix group
        for suffix in suffixes:
            # Construct the filename pattern for the current suffix
            filename = f"{p_id}_ratings_HRV_baselinecorrected_{suffix}.csv"
            file_path = os.path.join(folder_path, filename)
            
            # If the file exists, read it and append to the corresponding list
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                merged_data[suffix].append(df)
                print(f"Added {file_path} to merge group '{suffix}'")
            else:
                print(f"File {file_path} does not exist.")

# Merge the dataframes for each suffix and save to a new CSV file
for suffix, dfs in merged_data.items():
    if dfs:
        # Concatenate all dataframes in the list
        merged_df = pd.concat(dfs, ignore_index=True)
        
        # Save the merged dataframe to a new CSV file
        output_file_path = os.path.join(parent_dir, f"merged_ratings_HRV_baselinecorrected_{suffix}.csv")
        merged_df.to_csv(output_file_path, index=False)
        print(f"Merged file saved: {output_file_path}")
    else:
        print(f"No files to merge for suffix '{suffix}'")

Processing folder: C:\Users\BootMR\Documents\data_export\00-code_export
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratings_HRV_baselinecorrected_30s.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratings_HRV_baselinecorrected_60s1.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratings_HRV_baselinecorrected_60s2.csv does not exist.
File C:\Users\BootMR\Documents\data_export\00-code_export\00-code_export_ratings_HRV_baselinecorrected_120s.csv does not exist.
Processing folder: C:\Users\BootMR\Documents\data_export\03
Added C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected_30s.csv to merge group '30s'
Added C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected_60s1.csv to merge group '60s1'
Added C:\Users\BootMR\Documents\data_export\03\03_ratings_HRV_baselinecorrected_60s2.csv to merge group '60s2'
Added C:\Users\BootMR\Docu

## classification

In [9]:


# File paths for the merged datasets
merged_files = [
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_30s.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s1.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s2.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_120s.csv"
]

# Feature subset for classification
hrv_features = [
    'hr_mean', 'hr_stdev', 'hrv_mean', 'hrv_stdev',
    'HRV_MeanNN', 'HRV_RMSSD', 'HRV_LF', 'HRV_HF', 'HRV_LFHF'
]

# Iterate over each file
for file in merged_files:
    print(f"\nChecking file: {file}")

    try:
        # Load CSV file
        data = pd.read_csv(file)

        # Check if all required features are present
        missing_features = [feature for feature in hrv_features if feature not in data.columns]
        if missing_features:
            print(f"❌ Missing columns: {missing_features}")
        else:
            print("✅ All required features are present.")

            # Check for NaN values in required features
            nan_counts = data[hrv_features].isna().sum()
            total_missing = nan_counts.sum()

            if total_missing == 0:
                print("✅ No NaN values in any HRV feature.")
            else:
                print(f"⚠️ NaN values found:")
                print(nan_counts[nan_counts > 0])

    except Exception as e:
        print(f"Error reading {file}: {e}")



Checking file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_30s.csv
✅ All required features are present.
⚠️ NaN values found:
HRV_LF      918
HRV_LFHF    918
dtype: int64

Checking file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s1.csv
✅ All required features are present.
✅ No NaN values in any HRV feature.

Checking file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s2.csv
✅ All required features are present.
✅ No NaN values in any HRV feature.

Checking file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_120s.csv
✅ All required features are present.
✅ No NaN values in any HRV feature.


In [10]:
#### worked well 1st of march
# if i remember well, the merged files contain non-baselinecorrected values


# File paths
merged_files = [
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_30s.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s1.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s2.csv",
    r"C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_120s.csv"
]

# Features & target
hrv_features = [
    'hr_mean', 'hr_stdev', 'hrv_mean', 'hrv_stdev',
    'HRV_MeanNN', 'HRV_RMSSD', 'HRV_LF', 'HRV_HF', 'HRV_LFHF'
]
target_column = "rating"  # Target column for classification (-1, 0, 1)

# Classifiers to evaluate
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "SVM (RBF)": SVC(),
}

# Store results in a list
results = []

for file in merged_files:
    print(f"\nProcessing file: {file}")

    try:
        # Load data
        data = pd.read_csv(file)

        # Ensure required features exist
        missing_features = [feature for feature in hrv_features if feature not in data.columns]
        if missing_features:
            print(f"❌ Skipping {file} - Missing columns: {missing_features}")
            continue

        # Check if target column exists
        if target_column not in data.columns:
            print(f"❌ Skipping {file} - Target column '{target_column}' not found")
            continue

        # Extract features & target
        X = data[hrv_features]
        y = data[target_column]

        # Handle NaNs by imputing with mean
        imputer = SimpleImputer(strategy="mean")
        X = imputer.fit_transform(X)

        # Standardize the features
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        # Store file name
        dataset_name = file.split("\\")[-1]  # Extract just the filename

        ### 3-Way Classification (-1, 0, 1) ###
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        for clf_name, clf in classifiers.items():
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            f1 = f1_score(y_test, y_pred, average="weighted")  # Weighted to account for class imbalance
            results.append(["F1 Score", dataset_name, "3-Class", clf_name, f1])

        ### Binary Classification (-1 vs. 1) ###
        binary_mask = y != 0  # Remove class 0
        X_binary = X[binary_mask]
        y_binary = y[binary_mask]

        X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)

        for clf_name, clf in classifiers.items():
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            f1 = f1_score(y_test, y_pred, average="weighted")
            results.append(["F1 Score", dataset_name, "Binary", clf_name, f1])

    except Exception as e:
        print(f"❌ Error processing {file}: {e}")

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=["Metric", "Dataset", "Classification", "Model", "F1 Score"])

# Pivot table for easy readability
results_pivot = results_df.pivot(index=["Metric", "Classification", "Dataset"], columns="Model", values="F1 Score")

# Display results in table format
print("\n--- F1 Score Comparison ---")
print(results_pivot)

# Save to CSV
results_pivot.to_csv("f1_score_comparison.csv")
print("\n✅ Results saved to 'f1_score_comparison.csv'")



Processing file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_30s.csv





Processing file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s1.csv

Processing file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_60s2.csv

Processing file: C:\Users\BootMR\Documents\data_processed\CHECKIFYOUREALLYNEEDTHISFOLDER\merged_120s.csv

--- F1 Score Comparison ---
Model                                    Logistic Regression  Random Forest  \
Metric   Classification Dataset                                               
F1 Score 3-Class        merged_120s.csv             0.291943       0.574473   
                        merged_30s.csv              0.291943       0.538075   
                        merged_60s1.csv             0.291943       0.534851   
                        merged_60s2.csv             0.288289       0.530684   
         Binary         merged_120s.csv             0.578424       0.681755   
                        merged_30s.csv              0.578424       0.681998   
                     