In [1]:
import mne
import numpy as np
from mne.preprocessing import ICA

In [2]:
import pandas as pd
subject_pd=['PD1001','PD1021','PD1031','PD1061','PD1091','PD1101','PD1151'
                    ,'PD1201','PD1251','PD1261','PD1311','PD1571','PD1661','PD1681']
subject_hc=['Control1021','Control1041','Control1061','Control1081','Control1101'
                ,'Control1111','Control1191','Control1201','Control1211','Control1231','Control1291'
                ,'Control1351','Control1381','Control1411']


In [3]:
subject_files_pd=[]
subject_files_hc=[]

for subject in subject_pd:
    file_path = f"../IowaDataset/Raw data/{subject}.vhdr"
    subject_files_pd.append(file_path)

for subject in subject_hc:
    file_path = f"../IowaDataset/Raw data/{subject}.vhdr"
    subject_files_hc.append(file_path)

print(subject_files_pd)
print(subject_files_hc)


['../IowaDataset/Raw data/PD1001.vhdr', '../IowaDataset/Raw data/PD1021.vhdr', '../IowaDataset/Raw data/PD1031.vhdr', '../IowaDataset/Raw data/PD1061.vhdr', '../IowaDataset/Raw data/PD1091.vhdr', '../IowaDataset/Raw data/PD1101.vhdr', '../IowaDataset/Raw data/PD1151.vhdr', '../IowaDataset/Raw data/PD1201.vhdr', '../IowaDataset/Raw data/PD1251.vhdr', '../IowaDataset/Raw data/PD1261.vhdr', '../IowaDataset/Raw data/PD1311.vhdr', '../IowaDataset/Raw data/PD1571.vhdr', '../IowaDataset/Raw data/PD1661.vhdr', '../IowaDataset/Raw data/PD1681.vhdr']
['../IowaDataset/Raw data/Control1021.vhdr', '../IowaDataset/Raw data/Control1041.vhdr', '../IowaDataset/Raw data/Control1061.vhdr', '../IowaDataset/Raw data/Control1081.vhdr', '../IowaDataset/Raw data/Control1101.vhdr', '../IowaDataset/Raw data/Control1111.vhdr', '../IowaDataset/Raw data/Control1191.vhdr', '../IowaDataset/Raw data/Control1201.vhdr', '../IowaDataset/Raw data/Control1211.vhdr', '../IowaDataset/Raw data/Control1231.vhdr', '../IowaData

In [4]:
print(len(subject_files_pd))
print(len(subject_files_hc))

14
14


In [5]:
def set_montage(raw_data):
    montage = mne.channels.make_standard_montage('biosemi32')
    raw_data.set_montage(montage, on_missing='warn')
    return raw_data

In [6]:
def bandpass_filter(raw_data, l_freq=0.5, h_freq=50.0):
    raw_data.filter(l_freq=l_freq, h_freq=h_freq)
    return raw_data

In [7]:
def find_ecg_via_temporal_channels(ica, raw_data):
    # Experimentally identify ECG-like artifacts using temporal channels
    ecg_indices, ecg_scores = ica.find_bads_ecg(raw_data, ch_name='T7')
    ica.exclude += ecg_indices  # Exclude identified ECG-like components
    return ica

In [8]:
def segment_data(raw_data, duration=1.0):
    events = mne.make_fixed_length_events(raw_data, duration=duration)
    epochs = mne.Epochs(raw_data, events, tmin=0, tmax=duration, baseline=None, preload=True)
    eeg_data = epochs.get_data()  # Shape should be (180, 32, 512) if 3 mins, 32 channels, 512 samples/s
    return eeg_data

In [9]:
def apply_ica(raw_data, n_components=29):
    ica = ICA(n_components=n_components, random_state=97, max_iter="auto")
    ica.fit(raw_data)
    
    # Detect artifacts
    eog_indices, _ = ica.find_bads_eog(raw_data,ch_name=['Fp2', 'F8'],threshold=1.96)  # Detect eye blink components
    
    # Mark components for removal
    ica.exclude = eog_indices
    # Experimental ECG detection
    ica = find_ecg_via_temporal_channels(ica, raw_data)
    
    # Apply ICA to remove artifacts
    raw_data = ica.apply(raw_data)
    return raw_data

In [10]:
# Define frequency bands
freq_bands = {
    "delta": (1, 4),
    "theta": (4, 8),
    "alpha": (8, 12),
    "beta": (13, 30),
    "gamma": (30, 48)
}

In [11]:
def compute_psd(eeg_data, sfreq):
    psd_features = {}
    for band, (low, high) in freq_bands.items():
        psd_band, _ = mne.time_frequency.psd_array_multitaper(
            eeg_data, sfreq=sfreq, fmin=low, fmax=high, adaptive=True, normalization='full'
        )
        psd_features[band] = psd_band.mean(axis=2)  # Average PSD across time
    return psd_features

In [12]:
# Define a function to process a single subject and extract PSD features
def process_subject(raw_data, sfreq):
    # Apply bandpass filtering and artifact removal here as per previous preprocessing steps
    set_montage(raw_data)
    raw_filtered = bandpass_filter(raw_data)  # Assuming bandpass_filter function is defined
    raw_filtered = apply_ica(raw_filtered)
    epochs = segment_data(raw_filtered)  # Assuming segment_data function is defined to get (180, 32, 512)
    eeg_data = epochs[:,:29, :500]  # Shape (180, 29, 500)

    # Compute PSD features for this subject
    psd_features = compute_psd(eeg_data, sfreq)  # Dictionary with PSD for each frequency band
    return psd_features

In [13]:
all_psd_features = {'delta': {'data':[],'label':[]}, 'theta': {'data':[],'label':[]}, 'alpha': {'data':[],'label':[]}, 
                        'beta': {'data':[],'label':[]}, 'gamma': {'data':[],'label':[]} }

def collect_psd_features(subject_files, sfreq,label):
    
    for subject_file in subject_files:
        # Load subject's data
        raw_data = mne.io.read_raw_brainvision(subject_file, preload=True)
        raw_data.crop(tmax=120.)

        channels_to_keep = ['Fp1', 'AF3', 'F7', 'F3', 'FC1', 'FC5', 'T7', 'C3', 
                            'CP1', 'CP5', 'P7', 'P3', 'O1', 'Oz', 'O2', 
                            'P4', 'P8', 'CP6', 'CP2', 'C4', 'T8', 'FC6', 'FC2', 
                            'F4', 'F8', 'AF4', 'Fp2', 'Fz', 'Cz']
        raw_data = raw_data.pick_channels(channels_to_keep)
        # 29 channles
        
        # Process each subject's data to extract PSD features
        psd_features = process_subject(raw_data, sfreq)
        print("Shape of psd_features[delta] in each sub: ",np.asarray(psd_features['delta']).shape)
        # Append features and corresponding labels for each frequency band
        for band in all_psd_features.keys():
            all_psd_features[band]['data'].append(psd_features[band])  # Store 2D matrix (180x32)
            all_psd_features[band]['label'].append(label)  # Store corresponding integer label

    return all_psd_features

# all_psd_features = {
#     'alpha': {
#         'data': [array(180x32), array(180x32)],  # 2 subjects' data
#         'label': [1, 1]  # 2 corresponding labels
#     },
#     'delta': {
#         'data': [array(180x32), array(180x32)],
#         'label': [1, 1]
#     },
#     ...
# }

In [None]:
sfreq = 500  # Sample frequency as given

# psd_features_pd = collect_psd_features(subject_files_pd_on, sfreq)
psd_features_pd = collect_psd_features(subject_files_pd, sfreq,1)

In [15]:
print(len(all_psd_features["alpha"]['data']))
print(len(all_psd_features["alpha"]['data'][0]))
print(len(all_psd_features["alpha"]['data'][0][0]))
print(all_psd_features["alpha"]['label'])

14
120
29
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [None]:
psd_features_hc = collect_psd_features(subject_files_hc, sfreq,0)

In [17]:
print(len(all_psd_features["alpha"]['data']))
print(len(all_psd_features["alpha"]['data'][0]))
print(len(all_psd_features["alpha"]['data'][0][0]))
print(all_psd_features["alpha"]['label'])

28
120
29
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [21]:
# all_psd_features = {
#     'alpha': {
#         'data': [array(180x29), array(180x29),.....],  # 28 subjects' data (PD_ON vs HC)
#         'label': [1, 1, ...., 0]  # 28 corresponding labels
#     },
#     'delta': {
#         'data': [array(180x29), array(180x29), .....],
#         'label': [1, 1, ....]
#     },
#     ...
# }

In [18]:
import pickle

def save_psd_features(all_psd_features, output_dir="new_data/pd vs hc"):

    import os

    # Create directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)

    for band, band_data in all_psd_features.items():
        file_path = os.path.join(output_dir, f"{band}.pkl")
        
        with open(file_path, 'wb') as f:
            pickle.dump(band_data, f)
        
        print(f"Saved {band} features to {file_path}")

# alpha.pkl{
#     "data": [array(180x29), array(180x29), ...],  # 28 subjects' data
#     "label": [1, 1, ..., 0]  # 28 corresponding labels
# }


In [19]:
# Example usage
save_psd_features(all_psd_features)

Saved delta features to new_data/pd vs hc\delta.pkl
Saved theta features to new_data/pd vs hc\theta.pkl
Saved alpha features to new_data/pd vs hc\alpha.pkl
Saved beta features to new_data/pd vs hc\beta.pkl
Saved gamma features to new_data/pd vs hc\gamma.pkl


In [21]:
import os
def load_psd_features(band_name, input_dir="new_data/pd vs hc"):

    file_path = os.path.join(input_dir, f"{band_name}.pkl")

    with open(file_path, 'rb') as f:
        band_data = pickle.load(f)

    return band_data

# Example usage
alpha_data = load_psd_features("delta")
print(len(alpha_data["data"]))  # Should print 31
print(alpha_data["data"][0].shape)  # Should print (180, 32)
print(alpha_data["label"])  # Should print [1, 1, ..., 0]


28
(120, 29)
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
