In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob

BASE_DIR = r"C:\Users\User\Documents\EEG_Project\rEEG"

# --- Define feature types and patterns ---
FEATURE_FILES = {
    "FFT": "*_bandpowers_epoch.csv",
    "DWT": "*_DWT_band_energy.csv",
    "SpectralEntropy": "*_spectral_entropy.csv"
}

# Output directory for normalized features
NORM_DIR = os.path.join(BASE_DIR, "normalized_features")
os.makedirs(NORM_DIR, exist_ok=True)

def zscore_across_subjects(feature_df, value_cols):
    """Z-score each column (feature) across subjects."""
    return (feature_df[value_cols] - feature_df[value_cols].mean()) / feature_df[value_cols].std(ddof=0)

for feat_type, pattern in FEATURE_FILES.items():
    print(f"\nProcessing {feat_type} features...")

    all_files = glob(os.path.join(BASE_DIR, "**", pattern), recursive=True)
    if not all_files:
        print(f"No files found for pattern {pattern}")
        continue

    # Load all subject data into a single dataframe with a subject identifier
    dfs = []
    for fpath in all_files:
        subj = os.path.basename(fpath).split('_')[0]  # e.g., "sub-001"
        df = pd.read_csv(fpath)
        df['subject'] = subj
        dfs.append(df)

    combined = pd.concat(dfs, ignore_index=True)

    # Identify feature/value columns to normalize (exclude epoch, channel, subject)
    value_cols = [c for c in combined.columns if c not in ['epoch', 'channel', 'subject']]

    # Apply z-score across subjects per feature
    normalized = combined.copy()
    normalized[value_cols] = normalized.groupby(['channel'])[value_cols].transform(
        lambda x: (x - x.mean()) / x.std(ddof=0)
    )

    # Save normalized features per subject
    for subj, df_subj in normalized.groupby('subject'):
        out_file = os.path.join(NORM_DIR, f"{subj}_{feat_type}_normalized.csv")
        df_subj.to_csv(out_file, index=False)

    print(f"{feat_type} normalization complete. Saved {len(normalized['subject'].unique())} subjects.")


Raw combined shape: (683580, 19)
SUCCESS! Normalized master CSV saved to:
C:\Users\User\Documents\EEG_Project\rEEG\master_features_normalized.csv
Columns sample: Index(['epoch', 'channel', 'delta_abs', 'delta_rel', 'theta_abs', 'theta_rel',
       'alpha1_abs', 'alpha1_rel', 'alpha2_abs', 'alpha2_rel'],
      dtype='object')


In [3]:
import os
import pandas as pd
import numpy as np
from glob import glob

BASE_DIR = r"C:\Users\User\Documents\EEG_Project\rEEG"

# --- Define feature types and patterns ---
FEATURE_FILES = {
    "FFT": "*_bandpowers_epoch.csv",
    "DWT": "*_DWT_band_energy.csv",
    "SpectralEntropy": "*_spectral_entropy.csv"
}

# Output directory for normalized features
NORM_DIR = os.path.join(BASE_DIR, "normalized_features")
os.makedirs(NORM_DIR, exist_ok=True)

def zscore_across_subjects(feature_df, value_cols):
    """Z-score each column (feature) across subjects."""
    return (feature_df[value_cols] - feature_df[value_cols].mean()) / feature_df[value_cols].std(ddof=0)

for feat_type, pattern in FEATURE_FILES.items():
    print(f"\nProcessing {feat_type} features...")

    all_files = glob(os.path.join(BASE_DIR, "**", pattern), recursive=True)
    if not all_files:
        print(f"No files found for pattern {pattern}")
        continue

    # Load all subject data into a single dataframe with a subject identifier
    dfs = []
    for fpath in all_files:
        subj = os.path.basename(fpath).split('_')[0]  # e.g., "sub-001"
        df = pd.read_csv(fpath)
        df['subject'] = subj
        dfs.append(df)

    combined = pd.concat(dfs, ignore_index=True)

    # Identify feature/value columns to normalize (exclude epoch, channel, subject)
    value_cols = [c for c in combined.columns if c not in ['epoch', 'channel', 'subject']]

    # Apply z-score across subjects per feature
    normalized = combined.copy()
    normalized[value_cols] = normalized.groupby(['channel'])[value_cols].transform(
        lambda x: (x - x.mean()) / x.std(ddof=0)
    )

    # Save normalized features per subject
    for subj, df_subj in normalized.groupby('subject'):
        out_file = os.path.join(NORM_DIR, f"{subj}_{feat_type}_normalized.csv")
        df_subj.to_csv(out_file, index=False)

    print(f"{feat_type} normalization complete. Saved {len(normalized['subject'].unique())} subjects.")



Processing FFT features...


KeyboardInterrupt: 