In [1]:
import os
import numpy as np
from datetime import datetime
import mne
import torch
from torch.utils.data import TensorDataset
import sys
import logging

In [2]:
import numpy as np
import pywt

def z_score(epoch):
    """
    Apply z-score normalization to each channel of the EEG data.
    
    Arguments:
        - Epoch (numpy.ndarray): EEG data to be normalized.
        
    Returns:
        - Z-scored epoch (numpy.ndarray): Normalized EEG data.
    """    
    # Apply z-score normalization to each channel, saved in epoch
    for i in range(epoch.shape[0]):
        channel_epoch = epoch[i, :]
        mean = np.mean(channel_epoch)
        std = np.std(channel_epoch)
        z_scored_epoch = (channel_epoch - mean) / std
        epoch[i, :] = z_scored_epoch
    
    return epoch

def frequency_to_scale(freq, wavelet='morl', sampling_rate=250):
    """
    Convert frequency values to scales for continuous wavelet transform (CWT).

    Arguments:
        = freq (array): Array of frequency values.
        wavelet (str, optional): Type of wavelet to use. Defaults to 'morl'.
        sampling_rate (int): Sampling rate of the EEG data. Defaults to 250 Hz.

    Returns:
        - scales (array): Array of scales corresponding to the input frequencies.
    """
    # For the Morlet wavelet, scales are inversely proportional to frequency
    center_freq = pywt.central_frequency(wavelet)
    return center_freq / (freq / sampling_rate)

def apply_wavelet_transform(data_norm, wavelet='morl', freq_range=(8, 30), sampling_rate=250):
    """
    Apply wavelet transform to EEG data.
    
    Arguments:
        - data_norm (ndarray): 2D array with shape (n_channels, n_time_points)
        - wavelet (str): Wavelet type (default 'morl')
        - freq_range (tuple): Frequency range for the CWT (default (8, 30) Hz)
        - sampling_rate (int): Sampling rate of the EEG data (default 250 Hz)
    
    Returns:
    ndarray: 3D array with shape (n_channels, n_scales, n_time_points)
    """
    n_channels, n_times = data_norm.shape
    # Define scales based on the desired frequency range
    scales = frequency_to_scale(np.arange(freq_range[0], freq_range[1]+1), wavelet=wavelet, sampling_rate=sampling_rate)
    
    coeffs = []
    for i in range(n_channels):
        # Compute the wavelet transform coefficients
        coef, _ = pywt.cwt(data_norm[i], scales=scales, wavelet=wavelet)
        coeffs.append(coef)
    
    # Stack coefficients to form a 3D tensor
    coeffs_done = np.stack(coeffs, axis=0)
    
    return coeffs_done


In [6]:
"""
    This module contains functions to process EEG data from OpenVibe measurements.
    This saves the filtered files and the combined TensorDataset to the specified output folders.
"""

import os
import numpy as np
from datetime import datetime
import mne
import torch
import logging

# Configure logger for this module
logger = logging.getLogger(__name__)

def filter_eeg_files(input_folder):
    """
    Process all EDF files ending with 'R03', 'R07', or 'R11' in the input folder.
    Apply a 5th order IIR Butterworth filter, resample the data to 250 Hz, rename the channels to standard names,
    and return the filtered raw objects.

    Arguments:
        - input_folder (str): Path to the folder containing the EDF files to be filtered.

    Returns:
        - filtered_raw_list (list): List of filtered raw MNE objects.
    """
    # Define the filter parameters
    l_freq = 8
    h_freq = 30
    order = 5

    # Define IIR filter parameters
    iir_params = dict(order=order, ftype='butter')

    # List to collect the filtered raw objects
    filtered_raw_list = []

    # Iterate through all subfolders in the output folder
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.edf'):
                edf_file = os.path.join(root, file_name)

                # Load the raw data from the EDF file
                raw = mne.io.read_raw_edf(edf_file, preload=True)

                # Filter the raw data
                raw_filtered = raw.copy().filter(l_freq=l_freq, h_freq=h_freq, method='iir', iir_params=iir_params)

                # Resample the data
                raw_filtered.resample(250)

                # Append the filtered raw object to the list
                filtered_raw_list.append(raw_filtered)

    return filtered_raw_list

def filter_eeg_data(raw):
    """
    Apply a 5th order IIR Butterworth filter and resample the data to 250 Hz.
    Rename the channels to standard names.

    Arguments:
        - raw (mne.io.Raw): Raw EEG data.

    Returns:
        - raw_filtered (mne.io.Raw): Filtered and resampled EEG data.
    """
    l_freq = 8
    h_freq = 30
    order = 5

    # Define IIR filter parameters
    iir_params = dict(order=order, ftype='butter')

    # Filter the raw data
    raw_filtered = raw.copy().filter(l_freq=l_freq, h_freq=h_freq, method='iir', iir_params=iir_params)

    # Resample the data
    raw_filtered.resample(250)

    return raw_filtered

def get_epochs_and_labels_openvibe(edf_file):
    """
    Load the EEG data from a FIF file, extract the epochs and labels, and return them.

    Arguments:
        - FIF_file (str): Path to the FIF file containing the EEG data.

    Returns:
        - Epochs (mne.Epochs): EEG epochs extracted from the FIF file.
        - Labels (numpy.ndarray): Labels corresponding to the epochs.
    """
    # Load your EEG data
    raw = mne.io.read_raw_edf(edf_file, preload=True)
    
    # Get the events from the annotations
    events, _ = mne.events_from_annotations(raw)

    # T1 is left hand, T2 is right hand
    event_id = {'OVTK_GDF_Left': 5, 'OVTK_GDF_Right': 6}

    # Epochs start 0s before the trigger and end 0.5s after
    epochs = mne.Epochs(raw, events, event_id, tmin=0, tmax=0.5, baseline=None, preload=True)

    # Get the labels of the epochs
    labels = epochs.events[:, -1]

    # Change the labels to 0 and 1
    labels[labels == 5] = 0
    labels[labels == 6] = 1
    
    return epochs, labels

def file_to_tensor(input_folder, output_folder_tensor_dataset):
    """
    Process all EDF files in the input folder and create tensor datasets.

    Arguments:
        - input_folder (str): Path to the folder containing the EDF files.
        - output_folder_tensor_dataset (str): Path to the folder where the tensor dataset will be saved.

    Returns:
        - tensor_dataset (torch.Tensor): Combined dataset tensor.
        - labels_tensor (torch.Tensor): Labels tensor.
    """
    all_transformed_epochs = []
    all_labels = []

    # Filter the EEG files and get the filtered raw objects
    filtered_raw_list = filter_eeg_files(input_folder)

    # Process each filtered raw object
    for raw_filtered in filtered_raw_list:
        # Extract epochs and labels from the filtered data
        epochs, labels = get_epochs_and_labels_openvibe(raw_filtered)

        # Process each epoch
        for epoch, label in zip(epochs, labels):
            # Z-score each epoch
            epoch_norm = z_score(epoch)

            # Apply wavelet transformation
            epoch_wavelet = apply_wavelet_transform(epoch_norm)

            # Append the transformed epoch and its label to the lists
            all_transformed_epochs.append(epoch_wavelet)
            all_labels.append(label)

    # Convert lists to NumPy arrays before creating tensors
    all_transformed_epochs = np.array(all_transformed_epochs)
    all_labels = np.array(all_labels)

    # Convert the NumPy arrays to tensors
    tensor_dataset = torch.tensor(all_transformed_epochs, dtype=torch.float)
    labels_tensor = torch.tensor(all_labels, dtype=torch.long)

    # Get the current date and time
    current_datetime = datetime.now()

    # Format the date and time as a string for the dataset name
    dataset_name = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")

    # Make a subfolder for the tensor dataset in the output folder, name it with the current date and time
    subfolder = os.path.join(output_folder_tensor_dataset, dataset_name)

    # Save the combined dataset to the subfolder with the specified name
    os.makedirs(subfolder, exist_ok=True)
    tensor_dataset_file_name = os.path.join(subfolder, f"dataset_{dataset_name}.pt")
    torch.save(tensor_dataset, tensor_dataset_file_name)

    labels_tensor_file_name = os.path.join(subfolder, f"labels_{dataset_name}.pt")
    torch.save(labels_tensor, labels_tensor_file_name)

    return tensor_dataset, labels_tensor

In [7]:
# Example usage (assuming the script is run with an input folder containing EDF files):
input_folder = r"C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\raw"
output_folder_tensor_dataset = r"C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\processed"
tensor_dataset, labels_tensor = file_to_tensor(input_folder, output_folder_tensor_dataset)

Extracting EDF parameters from C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\raw\record-[2024.05.29-15.06.48]_FILTERED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 36749  =      0.000 ...   146.996 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 8 - 30 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 20 (effective, after forward-backward)
- Cutoffs at 8.00, 30.00 Hz: -6.02, -6.02 dB

Sampling frequency of the instance is already 250.0, returning unmodified.
Extracting EDF parameters from C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\raw\record-[2024.05.29-15.11.42]_FILTERED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 36749  =      0.000 ...   146.996 secs...
Filtering raw data in 1 contiguous segment
Setting up band-p

  raw = mne.io.read_raw_edf(edf_file, preload=True)
  raw = mne.io.read_raw_edf(edf_file, preload=True)
  raw = mne.io.read_raw_edf(edf_file, preload=True)
  raw = mne.io.read_raw_edf(edf_file, preload=True)
  raw = mne.io.read_raw_edf(edf_file, preload=True)


TypeError: _getfullpathname: path should be string, bytes or os.PathLike, not RawEDF

: 