In [None]:
# define root path
import sys
from pathlib import Path

PROJECT_ROOT = Path("..").resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


In [None]:
# load metadata from excel sheet
from src.load_data import load_metadata

DATA_DIR = Path("../data/MODMA")

meta = load_metadata(DATA_DIR / "subjects_information_EEG_128channels_resting_lanzhou_2015.xlsx")

print(meta.columns)
meta.head()


In [None]:
# load a sample subject's data
from src.load_data import load_subject

sample_mat = DATA_DIR / "02030020_rest 20151230 1416.mat" 
raw, label, info = load_subject(sample_mat, meta)

print(info)
print("label:", label)
print("shape:", raw.get_data().shape)
print("sfreq:", raw.info["sfreq"])


In [None]:
# preprocess the raw data
from src.preprocess import preprocess_raw, epoch_raw

raw_clean = preprocess_raw(raw)

raw_clean.plot(n_channels=20, duration=10, scalings="auto")
raw_clean.compute_psd(fmax = 50).plot()


In [None]:
epochs = epoch_raw(raw_clean, epoch_len=2.0)

print(epochs)
print("Epochs shape:", epochs.get_data().shape)


In [None]:
epochs_clean = epochs.copy().drop_bad(
    reject=dict(eeg=150e-6)  # conservative threshold
)

print("Before:", len(epochs))
print("After:", len(epochs_clean))


In [None]:
import numpy as np

data = epochs_clean.get_data()  # (n_epochs, n_ch, n_times)
ptp = np.ptp(data, axis = 2)         # peak-to-peak per epoch/channel

print("epochs_clean:", data.shape)
print("Median peak-to-peak (uV):", np.median(ptp) * 1e6)
print("95th percentile peak-to-peak (uV):", np.percentile(ptp, 95) * 1e6)


In [None]:
from src.features import featurize_subject, dict_to_row
import pandas as pd

feats = featurize_subject(epochs_clean)

print("num features:", len(feats))
list(feats.items())[:10]


In [None]:
row = dict_to_row(feats, subject_id=info["subject_id"], label=label)
df_one = pd.DataFrame([row])

df_one.head()


In [None]:
# Ensure no NaNs
print("NaNs:", df_one.isna().sum().sum())

# Ensure features are finite
import numpy as np
feat_only = df_one.drop(columns=["subject_id","label"])
print("Finite:", np.isfinite(feat_only.values).all())

