In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import mne

In [3]:

def load_eeg_data(patient_folder):
    """
    Loads EEG data from the specified patient folder.
    
    Parameters:
    - patient_folder: Path to the folder containing the patient's EEG files.
    
    Returns:
    - data_dict: Dictionary with (subject, state, file_name) as keys and raw EEG data as values.
    """
    data_dict = {}
    
    # Iterate over the state folders (ictal, interictal, etc.)
    for state_folder in os.listdir(patient_folder):
        full_state_path = os.path.join(patient_folder, state_folder)
        
        # Ensure it's a directory before processing
        if not os.path.isdir(full_state_path):
            continue
        
        fif_files = [f for f in os.listdir(full_state_path) if f.endswith('.fif')]
        for fif_file in fif_files:
            file_path = os.path.join(full_state_path, fif_file)
            raw = mne.io.read_raw_fif(file_path, preload=False)
            print(f"Loaded {file_path} with info: {raw.info}")
            
            subject = os.path.basename(patient_folder)  # Extract subject name from folder
            data_dict[(subject, state_folder, fif_file)] = raw
    
    return data_dict

def process_eeg_data(data_dict, target_sfreq=128, epoch_duration=5.0, ictal_overlap=0.99, preictal_overlap=0.5, post_ictal=0.80, save_dir=r'D:\preprocessed EEG'):
    """
    Processes EEG data by downsampling, creating epochs with overlap,
    and saving the processed epochs into folders based on subject and state.
    Also prints the number of epochs for each state.
    
    Parameters:
    - data_dict: Dictionary with (subject, state, file_name) as keys and raw EEG data as values.
    - target_sfreq: Target sampling frequency for downsampling.
    - epoch_duration: Duration of each epoch in seconds.
    - save_dir: Root directory to save processed data (default: 'D:\\preprocessed EEG')
    """
    
    epoch_counts = {}  # To track the count of epochs per state
    
    for (subject, state, file_name), raw in data_dict.items():
        
        if raw.times[-1] < epoch_duration:
            print(f"Warning: Data length is shorter than the epoch duration for {file_name}.")
            continue

        # Set overlap and label based on seizure state
        if state == 'ictal':
            overlap_fraction = ictal_overlap
        elif state == 'preictal':
            overlap_fraction = preictal_overlap
        elif state == 'postictal':
            overlap_fraction = post_ictal
        else:  # interictal
            overlap_fraction = 0.0

        overlap = epoch_duration * overlap_fraction

        # Downsample the data
        raw.resample(target_sfreq)
        
        # Create fixed-length epochs with overlap
        epochs = mne.make_fixed_length_epochs(raw, duration=epoch_duration, preload=True, overlap=overlap)
        # Convert epochs data to float32 to reduce memory usage
        # epochs._data = epochs.get_data().astype(np.float32)
        
        # Count the number of epochs for this state
        if state not in epoch_counts:
            epoch_counts[state] = 0
        epoch_counts[state] += len(epochs)

        # Saving the epochs
        subject_name = subject.split('\\')[-1]
        subject_dir = os.path.join(save_dir, subject_name)
        state_dir = os.path.join(subject_dir, state)
        os.makedirs(state_dir, exist_ok=True)  # Ensure directories are created

        # Save the entire epochs object to a single file
        epoch_file = f'{file_name.replace(".fif", "")}_epo.fif'
        epoch_path = os.path.join(state_dir, epoch_file)
        epochs.save(epoch_path, overwrite=True)
        
        print(f"Saved epochs for {file_name} in {state_dir}")


    # Save epoch count information to a text file in the patient folder
    epoch_info_path = os.path.join(subject_dir, f'{subject_name}_epoch_info.txt')
    with open(epoch_info_path, 'w') as f:
        f.write('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n')
        for state, count in epoch_counts.items():
            f.write(f"{state}: {count} events\n")
        f.write('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n')


    
    print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    # Print the number of epochs for each state
    for state, count in epoch_counts.items():
        print(f"{state}: {count} events")
    print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')



# Enter the folder location and the paramters

In [4]:
import gc
import os

# Define the folder where all the patients' data is stored
folder = r'D:\CHB_MIT'

# Create a list of all patient folders
file_paths = [os.path.join(folder, i) for i in os.listdir(folder) if os.path.isdir(os.path.join(folder, i))]

# data_dict = load_eeg_data(file_paths[3])
# process_eeg_data(data_dict,  target_sfreq=128, epoch_duration=15.0, ictal_overlap=0.95, preictal_overlap=0.50, post_ictal=0.50, save_dir=r'D:\Epilepsy\S15 ES')

for i in range(0,len(file_paths)): 
    data_dict = load_eeg_data(file_paths[i])
    process_eeg_data(data_dict,  target_sfreq=128, epoch_duration=1.0, ictal_overlap=0.95, preictal_overlap=0.50, post_ictal=0.50, save_dir=r'D:\Epilepsy\S1 ES')


Opening raw data file D:\CHB_MIT\chb01\ictal\chb01_03.edf_ictal_1_raw.fif...
Isotrak not found
    Range : 766976 ... 777216 =   2996.000 ...  3036.000 secs
Ready.
Loaded D:\CHB_MIT\chb01\ictal\chb01_03.edf_ictal_1_raw.fif with info: <Info | 10 non-empty values
 bads: []
 ch_names: FP1-F7, F7-T7, T7-P7, P7-O1, FP1-F3, F3-C3, C3-P3, P3-O1, ...
 chs: 21 EEG
 custom_ref_applied: False
 file_id: 4 items (dict)
 highpass: 0.5 Hz
 lowpass: 100.0 Hz
 meas_date: unspecified
 meas_id: 4 items (dict)
 nchan: 21
 projs: []
 sfreq: 256.0 Hz
 subject_info: 1 item (dict)
>
Opening raw data file D:\CHB_MIT\chb01\ictal\chb01_04.edf_ictal_1_raw.fif...
Isotrak not found
    Range : 375552 ... 382464 =   1467.000 ...  1494.000 secs
Ready.
Loaded D:\CHB_MIT\chb01\ictal\chb01_04.edf_ictal_1_raw.fif with info: <Info | 10 non-empty values
 bads: []
 ch_names: FP1-F7, F7-T7, T7-P7, P7-O1, FP1-F3, F3-C3, C3-P3, P3-O1, ...
 chs: 21 EEG
 custom_ref_applied: False
 file_id: 4 items (dict)
 highpass: 0.5 Hz
 lowpa