In [18]:
## Setup & config

# Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import mne
from scipy.stats import ttest_ind
import warnings

# Ignore noisy warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Move to your project directory
%cd /Users/diarmuidlenihan/Documents/eeg_proj

# Confirm the working directory and files
print("CWD now:", Path().resolve())
print("data/raw exists ->", Path("data/raw").exists())

# EEG configuration
RAW_DIR = Path("data/raw")   # Folder where your Origin_*.txt files are stored
SFREQ = 250.0                # Sampling frequency in Hz
CH_NAMES = [
    'Fp1','Fp2','F3','F4','C3','C4','P3','P4','O1','O2',
    'F7','F8','T7','T8','P7','P8','Fz','Cz','Pz','Oz',
    'FC1','FC2','CP1','CP2','FC5','FC6','CP5','CP6',
    'TP9','TP10','POz'
]

# Quick check that files are visible
files_check = sorted(RAW_DIR.glob("Origin_*.txt"))
print("Files found:", len(files_check))
for f in files_check[:5]:
    print(" -", f.name)

/Users/diarmuidlenihan/Documents/eeg_proj
CWD now: /Users/diarmuidlenihan/Documents/eeg_proj
data/raw exists -> True
Files found: 12
 - Origin_H1.txt
 - Origin_H2.txt
 - Origin_H3.txt
 - Origin_H4.txt
 - Origin_H5.txt


In [19]:
## Helper functions

def load_txt_eeg(path):
    """
    Load EEG data from a text file with a header of channel names.
    Returns array of shape (31, n_times) in VOLTS.
    """
    try:
        # Try reading with pandas automatic separator detection
        df = pd.read_csv(path, sep=None, engine="python")
    except Exception:
        # Fallback to whitespace-only
        df = pd.read_csv(path, delim_whitespace=True, engine="python")

    # If no string headers, re-read with no header
    if not any(isinstance(c, str) for c in df.columns):
        df = pd.read_csv(path, sep=None, engine="python", header=None)

    # Drop obvious non-signal columns
    to_drop = []
    for c in df.columns:
        if isinstance(c, str):
            s = c.strip().lower()
            if s.startswith("time") or s.startswith("sample") or c.startswith("Unnamed"):
                to_drop.append(c)
    df = df.drop(columns=to_drop, errors="ignore")

    # Reorder columns to expected channel order if headers match
    expected = set(CH_NAMES)
    cols = [c.strip() if isinstance(c, str) else c for c in df.columns]
    df.columns = cols
    have = [c for c in cols if c in expected]

    if len(have) == len(CH_NAMES):
        df = df[CH_NAMES]
        X = df.to_numpy(dtype=float).T          # shape (31, n_times)
    else:
        arr = df.to_numpy(dtype=float, copy=True)
        if arr.shape[1] == len(CH_NAMES):       # (n_times, 31)
            X = arr.T
        elif arr.shape[0] == len(CH_NAMES):     # (31, n_times)
            X = arr
        else:
            raise ValueError(f"{Path(path).name}: unexpected shape {arr.shape}")

    # Convert microvolts to volts if needed
    if np.nanmax(np.abs(X)) > 1e-3:
        X = X * 1e-6

    return X


def band_power(raw, fmin, fmax):
    """
    Compute mean power in a frequency band for a given MNE Raw object.
    """
    psd, freqs = mne.time_frequency.psd_welch(
        raw, fmin=fmin, fmax=fmax, n_fft=2048, n_overlap=1024
    )
    return float(psd.mean(axis=1).mean())

In [20]:
## File processing

files = sorted(RAW_DIR.glob("Origin_*.txt"))
print("Processing", len(files), "files...")

rows = []

for f in files:
    group = "HC" if "_H" in f.stem else "PTSD"
    subj = f.stem.replace("Origin_", "")

    # 1. Load data
    X = load_txt_eeg(f)

    # 2. Build MNE Raw object
    info = mne.create_info(CH_NAMES, SFREQ, ch_types="eeg")
    raw = mne.io.RawArray(X, info, verbose=False)
    raw.set_montage("standard_1020", on_missing='ignore')
    raw.filter(1., 40., fir_design='firwin', verbose=False)

    # 3. Compute features
    alpha = band_power(raw, 8., 12.)
    beta = band_power(raw, 13., 30.)

    # 4. Store results
    rows.append({
        "subject": subj,
        "group": group,
        "alpha_power": alpha,
        "beta_power": beta
    })

print("Rows collected:", len(rows))
df = pd.DataFrame(rows).sort_values(["group","subject"])
display(df.head())

# Optional: save for later use
df.to_csv("outputs/results/band_power_2018.csv", index=False)

Processing 12 files...


TypeError: firwin() got an unexpected keyword argument 'nyq'

In [None]:
## Stats & plots

# T-tests for group differences
for band in ["alpha_power", "beta_power"]:
    a = df[df.group=="PTSD"][band]
    b = df[df.group=="HC"][band]
    t, p = ttest_ind(a, b, equal_var=False)
    print(f"{band}: t={t:.2f}, p={p:.4f}")

# Simple bar plot
plt.bar(["PTSD","HC"], [
    df[df.group=="PTSD"]["alpha_power"].mean(),
    df[df.group=="HC"]["alpha_power"].mean()
])
plt.ylabel("Mean Alpha Power (a.u.)")
plt.title("Alpha Power by Group")
plt.show()