# Microstate Extraction

In [None]:
import os
import mne
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

BASE_DIR = r"C:\Users\User\Documents\EEG_Project\rEEG"
SUBJECTS = [f"sub-{i:03d}" for i in range(1, 150)]
N_STATES = 4
SFREQ = 250  # adjust to your sampling frequency

def compute_microstate_features(labels, n_states=N_STATES, sfreq=SFREQ):
    features = {}
    total_samples = len(labels)
    changes = np.where(np.diff(labels) != 0)[0] + 1
    segment_starts = np.insert(changes, 0, 0)
    segment_lengths = np.diff(np.append(segment_starts, total_samples))
    segment_labels = labels[segment_starts]

    for s in range(n_states):
        state_segments = segment_lengths[segment_labels == s]
        features[f"MS{s+1}_coverage"] = np.sum(state_segments) / total_samples
        if len(state_segments) > 0:
            features[f"MS{s+1}_duration_ms"] = (state_segments.mean() / sfreq) * 1000
            features[f"MS{s+1}_occurrence_rate"] = len(state_segments) / (total_samples / sfreq)
        else:
            features[f"MS{s+1}_duration_ms"] = 0
            features[f"MS{s+1}_occurrence_rate"] = 0
    return features

def compute_transition_matrix(labels, n_states=N_STATES):
    T = np.zeros((n_states, n_states))
    for i in range(len(labels) - 1):
        T[labels[i], labels[i+1]] += 1
    # Normalize per row
    row_sums = T.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1
    return T / row_sums

for SUBJ in SUBJECTS:
    SUBJ_DIR = os.path.join(BASE_DIR, SUBJ)
    EPO_FILE = os.path.join(SUBJ_DIR, f"epo_{SUBJ[-3:]}_raw.fif")
    DATA_DIR = os.path.join(SUBJ_DIR, "data")
    os.makedirs(DATA_DIR, exist_ok=True)

    if not os.path.exists(EPO_FILE):
        print(f"{SUBJ}: epochs file missing, skipping")
        continue

    print(f"Processing {SUBJ}...")
    epochs = mne.read_epochs(EPO_FILE, preload=True)
    data = epochs.get_data()  # shape: (n_epochs, n_channels, n_times)
    
    # Flatten epochs: concatenate epochs along time for clustering
    all_data = data.transpose(0, 2, 1).reshape(-1, data.shape[1])  # (n_epochs*n_times, n_channels)
    
    # Optional: normalize per channel
    all_data -= all_data.mean(axis=0)
    all_data /= all_data.std(axis=0) + 1e-12
    
    # K-means clustering
    kmeans = KMeans(n_clusters=N_STATES, random_state=42, n_init=10)
    labels = kmeans.fit_predict(all_data)
    
    # Compute microstate features
    features = compute_microstate_features(labels, n_states=N_STATES, sfreq=epochs.info["sfreq"])
    
    # Compute transition probabilities
    trans_matrix = compute_transition_matrix(labels, n_states=N_STATES)
    for i in range(N_STATES):
        for j in range(N_STATES):
            features[f"MS{i+1}_to_MS{j+1}_TP"] = trans_matrix[i, j]
    
    # Save features
    df = pd.DataFrame([features])
    out_file = os.path.join(DATA_DIR, f"{SUBJ}_microstate_features.csv")
    df.to_csv(out_file, index=False)
    print(f"Saved microstate features: {out_file}")


## Narrowband Analysis

In [6]:
import os
import numpy as np
import pandas as pd
import mne
from scipy.signal import find_peaks
from sklearn.cluster import KMeans

# =========================
# CONFIG
# =========================
BASE_DIR = r"C:\Users\User\Documents\EEG_Project\rEEG"
SUBJECTS = [f"sub-{i:03d}" for i in range(1, 150)]
BANDS = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 13),
}
N_MICROSTATES = 4
MIN_PEAK_MS = 10
MIN_LABEL_SAMPLES = 3

# =========================
# HELPERS
# =========================
def compute_gfp(data):
    return np.std(data, axis=0)

def extract_gfp_maps(data, sfreq, min_peak_ms=10):
    gfp = compute_gfp(data)
    min_samples = max(1, int((min_peak_ms / 1000) * sfreq))
    peaks, _ = find_peaks(gfp, distance=min_samples)
    maps = data[:, peaks].T
    # Spatial DC removal (Average Reference behavior)
    maps -= maps.mean(axis=1, keepdims=True)
    return maps

def backfit_microstates(data, centroids):
    # data: (n_channels, n_times), centroids: (n_states, n_channels)
    data_zero = data - data.mean(axis=0, keepdims=True)
    # Correct correlation-based backfitting (Polarity Invariant)
    dot = np.dot(centroids, data_zero)
    norm_c = np.linalg.norm(centroids, axis=1, keepdims=True)
    norm_d = np.linalg.norm(data_zero, axis=0, keepdims=True)
    corr = dot / (norm_c * norm_d + 1e-12)
    return np.argmax(np.abs(corr), axis=0)

def smooth_labels(labels, min_samples=3):
    smoothed = labels.copy()
    for i in range(1, len(labels) - 1):
        if labels[i] != labels[i-1] and labels[i] != labels[i+1]:
            smoothed[i] = labels[i-1]
    return smoothed

def compute_microstate_stats(labels, sfreq, n_states):
    total_samples = len(labels)
    # Duration and Occurrence logic from your working script
    changes = np.where(np.diff(labels) != 0)[0] + 1
    starts = np.insert(changes, 0, 0)
    lengths = np.diff(np.append(starts, total_samples))
    states = labels[starts]

    stats = {}
    for s in range(n_states):
        segs = lengths[states == s]
        stats[f"MS{s+1}_coverage"] = np.sum(segs) / total_samples
        if len(segs) > 0:
            stats[f"MS{s+1}_duration_ms"] = (segs.mean() / sfreq) * 1000
            stats[f"MS{s+1}_occurrence"] = len(segs) / (total_samples / sfreq)
        else:
            stats[f"MS{s+1}_duration_ms"] = 0
            stats[f"MS{s+1}_occurrence"] = 0

    # Transition Matrix
    T = np.zeros((n_states, n_states))
    for i in range(len(labels) - 1):
        T[labels[i], labels[i+1]] += 1
    row_sums = T.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1
    T /= row_sums
    
    for i in range(n_states):
        for j in range(n_states):
            stats[f"T_{i+1}_{j+1}"] = T[i, j]
            
    return stats

# =========================
# MAIN LOOP
# =========================
for SUBJ in SUBJECTS:
    SUBJ_DIR = os.path.join(BASE_DIR, SUBJ)
    # Using the path format from your working script
    EPO_FILE = os.path.join(SUBJ_DIR, f"epo_{SUBJ[-3:]}_raw.fif")
    DATA_DIR = os.path.join(SUBJ_DIR, "data")
    
    if not os.path.exists(EPO_FILE):
        continue

    print(f"Processing {SUBJ}...")
    os.makedirs(DATA_DIR, exist_ok=True)
    epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)
    epochs.set_eeg_reference("average", projection=False, verbose=False)
    sfreq = epochs.info['sfreq']

    all_band_features = []

    for band_name, (fmin, fmax) in BANDS.items():
        # 1. Filter
        ep_filt = epochs.copy().filter(fmin, fmax, fir_design='firwin', verbose=False)
        
        # 2. Reshape to (n_channels, n_times)
        data = ep_filt.get_data() # (n_epochs, n_channels, n_times)
        data = data.transpose(1, 0, 2).reshape(data.shape[1], -1) 
        
        # 3. Extract GFP Peak Maps for clustering
        gfp_maps = extract_gfp_maps(data, sfreq, MIN_PEAK_MS)
        
        if len(gfp_maps) < N_MICROSTATES:
            continue

        # 4. Cluster
        km = KMeans(n_clusters=N_MICROSTATES, random_state=42, n_init=10)
        km.fit(gfp_maps)
        
        # 5. Backfit and Smooth
        labels = backfit_microstates(data, km.cluster_centers_)
        labels = smooth_labels(labels, MIN_LABEL_SAMPLES)
        
        # 6. Compute Stats
        band_stats = compute_microstate_stats(labels, sfreq, N_MICROSTATES)
        band_stats["band"] = band_name
        band_stats["subject"] = SUBJ
        all_band_features.append(band_stats)

    # Save CSV for this subject
    if all_band_features:
        df = pd.DataFrame(all_band_features)
        out_path = os.path.join(DATA_DIR, f"{SUBJ}_microstate_narrowband.csv")
        df.to_csv(out_path, index=False)

print("Done! Check individual 'data' folders for results.")

Processing sub-001...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-002...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-003...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-004...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-005...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-006...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-007...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-008...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-009...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-010...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-011...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-012...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-013...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-014...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-015...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-016...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-017...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)
  ep_filt = epochs.copy().filter(fmin, fmax, fir_design='firwin', verbose=False)
  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-018...




Processing sub-019...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-020...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-021...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-022...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-023...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-024...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-025...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-026...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-027...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-028...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-029...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-030...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-031...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-032...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-033...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-034...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-035...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-036...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-037...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-038...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-039...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-040...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-041...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-042...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-043...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-044...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-045...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)
  ep_filt = epochs.copy().filter(fmin, fmax, fir_design='firwin', verbose=False)


Processing sub-046...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-047...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-048...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)
  ep_filt = epochs.copy().filter(fmin, fmax, fir_design='firwin', verbose=False)


Processing sub-049...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-050...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-051...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-052...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-053...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-054...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-055...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-056...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-057...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-058...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-059...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-060...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-061...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-062...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-063...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-064...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-065...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-066...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-067...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-068...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-069...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-070...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-071...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-072...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-073...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-074...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-075...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-076...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-077...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-078...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-079...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-080...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-081...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-082...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-083...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-084...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-085...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-086...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-087...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-088...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-089...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-090...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-091...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-092...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-093...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-094...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-095...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-096...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-097...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-098...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-099...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-100...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-101...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-102...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-103...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-104...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-105...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-106...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-107...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-108...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-109...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-110...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-111...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-112...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-113...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-114...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-115...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-116...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-117...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-118...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-119...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-120...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-121...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-122...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-123...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-124...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-125...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-126...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-127...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-128...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-129...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-130...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-131...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-132...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-133...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-134...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-135...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-136...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-137...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-138...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-139...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-140...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-141...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-142...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-143...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-144...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-145...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-146...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-147...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-148...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Processing sub-149...


  epochs = mne.read_epochs(EPO_FILE, preload=True, verbose=False)


Done! Check individual 'data' folders for results.


In [None]:
import os
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, pearsonr

# -------------------------------
# Configuration
# -------------------------------
BASE_DIR = r"C:\Users\User\Documents\EEG_Project\rEEG"
ROI_FILE_SUFFIX = "_ROI_band_ratios.csv"
RATIO = "delta_beta"  # change to delta/beta

# Load subject info
meta = pd.read_csv(os.path.join(BASE_DIR, "participants.csv"))  # must have columns: participant_id, MOCA, GROUP

# Only PD subjects
PD_SUBS = meta.loc[meta['GROUP']=='PD', 'participant_id'].tolist()

# -------------------------------
# Collect ROI ratio values
# -------------------------------
roi_data = {}  # subj -> {roi: ratio}
for subj in PD_SUBS:
    roi_file = os.path.join(BASE_DIR, subj, "data", f"{subj}{ROI_FILE_SUFFIX}")
    if not os.path.exists(roi_file):
        print(f"{subj}: missing ROI file")
        continue
    df = pd.read_csv(roi_file)
    # Compute delta/beta if not already computed
    if 'delta_beta' not in df.columns:
        df['delta_beta'] = df['delta_power'] / (df['beta_power'] + 1e-12)
    roi_data[subj] = dict(zip(df['roi'], df['delta_beta']))

# -------------------------------
# Aggregate for group analysis
# -------------------------------
agg_vals = []
labels = []
moca_vals_list = []

for subj, roi_ratios in roi_data.items():
    moca_vals = meta.loc[meta['participant_id'] == subj, 'MOCA'].values
    if len(moca_vals) == 0 or np.isnan(moca_vals[0]):
        print(f"{subj}: missing MoCA, skipping")
        continue
    moca = moca_vals[0]

    # Average delta/beta across all ROIs
    mean_ratio = np.mean(list(roi_ratios.values()))
    agg_vals.append(mean_ratio)
    labels.append(1 if moca < 26 else 0)  # 1 = impaired, 0 = unimpaired
    moca_vals_list.append(moca)

agg_vals = np.array(agg_vals)
labels = np.array(labels)
moca_vals_list = np.array(moca_vals_list)

# Split groups
impaired = agg_vals[labels == 1]
unimpaired = agg_vals[labels == 0]

# -------------------------------
# Stats
# -------------------------------
t, p = ttest_ind(impaired, unimpaired, equal_var=False)
r, p_corr = pearsonr(agg_vals, moca_vals_list)

print(f"Delta/Beta ratio")
print(f"Cohort counts (PD only):")
print(f"  Total PD with MoCA+ROI: {len(agg_vals)}")
print(f"  Impaired (MoCA <26): {len(impaired)}")
print(f"  Unimpaired (MoCA >=26): {len(unimpaired)}\n")

print(f"Impaired: n={len(impaired)} | Mean={impaired.mean():.6f} | Std={impaired.std():.6f}")
print(f"Unimpaired: n={len(unimpaired)} | Mean={unimpaired.mean():.6f} | Std={unimpaired.std():.6f}\n")

print(f"--- Group comparison (Impaired vs Unimpaired) ---")
print(f"t = {t:.3f}, p = {p:.6g}")

print(f"\n--- Correlation (Delta/Beta vs MoCA) across PD ---")
print(f"Pearson r = {r:.3f}, p = {p_corr:.6g}")

# -------------------------------
# Optional: Save aggregated ROI ratio data
# -------------------------------
np.save("PD_delta_beta_ratio.npy", roi_data)
