In [15]:
import pandas as pd
from scipy.stats import ttest_ind, spearmanr

# -------------------------------
# Config
# -------------------------------
CSV_PATH = r"C:\Users\User\Documents\EEG_Project\ML_Feature_Matrix.csv"
FEATURE = "Feature_Gamma_Posterior_Abs"
LABEL_COL = "Label_Impaired"
MOCA_COL = "Target_MoCA"
MIN_GROUP_SIZE = 3

# -------------------------------
# Load Data
# -------------------------------
df = pd.read_csv(CSV_PATH)

# Drop rows with missing values in feature or MoCA
df = df.dropna(subset=[FEATURE, MOCA_COL, LABEL_COL])

# Split by label
imp = df.loc[df[LABEL_COL] == 1, FEATURE]
unimp = df.loc[df[LABEL_COL] == 0, FEATURE]

# -------------------------------
# T-test
# -------------------------------
if len(imp) >= MIN_GROUP_SIZE and len(unimp) >= MIN_GROUP_SIZE:
    t_stat, p_group = ttest_ind(imp, unimp, equal_var=False)
else:
    t_stat, p_group = float('nan'), float('nan')
    print("Warning: One group has too few valid samples for t-test.")

# -------------------------------
# Spearman correlation
# -------------------------------
rho, p_corr = spearmanr(df[FEATURE], df[MOCA_COL])

# -------------------------------
# Results
# -------------------------------
print(f"Feature: {FEATURE}")
print(f"Subjects (non-NaN): {len(df)}")
print(f"T-test (impaired vs unimpaired): t = {t_stat:.3f}, p = {p_group:.4f}")
print(f"Spearman correlation with MoCA: rho = {rho:.3f}, p = {p_corr:.4f}")


Feature: Feature_Gamma_Posterior_Abs
Subjects (non-NaN): 100
T-test (impaired vs unimpaired): t = 2.641, p = 0.0098
Spearman correlation with MoCA: rho = -0.258, p = 0.0096
