In [1]:
import numpy as np
import pandas as pd
import os
import mne
import pywt
import torch
from datetime import datetime
import torch
from torch.utils.data import TensorDataset

In [8]:
def filter_eeg_files(input_folder, output_folder):
    """
    Process all EDF files ending with 'R03', 'R07', or 'R11' in the input folder.
    Apply a 5th order IIR Butterworth filter, resample the data to 250 Hz, rename the channels to standard names,
    and save each filtered file with a '_filtered' suffix in the output folder.

    Arguments:
        - Input_folder (str): Path to the folder containing the EDF files to be filtered.
        - Output_folder (str): Path to the folder where the filtered files will be saved.
        
    Returns:
        - FIF files: Filtered EEG data files saved in the output folder.
        - filtered_files_list (list): List of paths to the filtered files.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Define the filter parameters
    l_freq=8 
    h_freq=30
    order=5

    # Define IIR filter parameters
    iir_params = dict(order=order, ftype='butter')
    
    # List to collect paths of the filtered files
    filtered_files_list = []

    # Iterate through all subfolders in the output folder
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.edf'):
                # Construct full file path
                file_path = os.path.join(root, file_name)
                
                # Load the EDF file
                raw = mne.io.read_raw_edf(file_path, preload=True)                
                
                # Apply the 5th order IIR Butterworth filter
                raw.filter(l_freq=l_freq, h_freq=h_freq, method='iir', iir_params=iir_params)
                
                # Define the output file path
                output_file_name = f"{os.path.splitext(file_name)[0]}_filtered.fif"
                output_file_path = os.path.join(output_folder, output_file_name)
                
                # Save the filtered data
                raw.save(output_file_path, overwrite=True)  
                
                # Add the output file path to the list, for return
                filtered_files_list.append(output_file_path)
    
    # Return the list of filtered file paths
    return filtered_files_list     

In [9]:
def get_epochs_and_labels_openvibe(fif_file):
    """
    Load the EEG data from a FIF file, extract the epochs and labels, and return them.

    Arguments:
        - FIF_file (str): Path to the FIF file containing the EEG data.

    Returns:
        - Epochs (mne.Epochs): EEG epochs extracted from the FIF file.
        - Labels (numpy.ndarray): Labels corresponding to the epochs.
    """
    # Load your EEG data
    raw = mne.io.read_raw_fif(fif_file, preload=True)
    
    # Get the events from the annotations
    events, _ = mne.events_from_annotations(raw)

    # T1 is left hand, T2 is right hand
    event_id = {'OVTK_GDF_Left': 5, 'OVTK_GDF_Right': 6}

    # Epochs start 0s before the trigger and end 0.5s after
    epochs = mne.Epochs(raw, events, event_id, tmin=0, tmax=0.5, baseline=None, preload=True)

    # Get the labels of the epochs
    labels = epochs.events[:, -1]

    # Change the labels to 0 and 1
    labels[labels == 5] = 0
    labels[labels == 6] = 1
    
    return epochs, labels

In [4]:
def z_score(epoch):
    """
    Apply z-score normalization to each channel of the EEG data.
    
    Arguments:
        - Epoch (numpy.ndarray): EEG data to be normalized.
        
    Returns:
        - Z-scored epoch (numpy.ndarray): Normalized EEG data.
    """    
    # Apply z-score normalization to each channel, saved in epoch
    for i in range(epoch.shape[0]):
        channel_epoch = epoch[i, :]
        mean = np.mean(channel_epoch)
        std = np.std(channel_epoch)
        z_scored_epoch = (channel_epoch - mean) / std
        epoch[i, :] = z_scored_epoch
    
    return epoch

In [5]:
def frequency_to_scale(freq, wavelet='morl', sampling_rate=250):
    """
    Convert frequency values to scales for continuous wavelet transform (CWT).

    Arguments:
        = freq (array): Array of frequency values.
        wavelet (str, optional): Type of wavelet to use. Defaults to 'morl'.
        sampling_rate (int): Sampling rate of the EEG data. Defaults to 250 Hz.

    Returns:
        - scales (array): Array of scales corresponding to the input frequencies.
    """
    # For the Morlet wavelet, scales are inversely proportional to frequency
    center_freq = pywt.central_frequency(wavelet)
    return center_freq / (freq / sampling_rate)

In [6]:
def apply_wavelet_transform(data_norm, wavelet='morl', freq_range=(8, 30), sampling_rate=250):
    """
    Apply wavelet transform to EEG data.
    
    Arguments:
        - data_norm (ndarray): 2D array with shape (n_channels, n_time_points)
        - wavelet (str): Wavelet type (default 'morl')
        - freq_range (tuple): Frequency range for the CWT (default (8, 30) Hz)
        - sampling_rate (int): Sampling rate of the EEG data (default 250 Hz)
    
    Returns:
    ndarray: 3D array with shape (n_channels, n_scales, n_time_points)
    """
    n_channels, n_times = data_norm.shape
    # Define scales based on the desired frequency range
    scales = frequency_to_scale(np.arange(freq_range[0], freq_range[1]+1), wavelet=wavelet, sampling_rate=sampling_rate)
    
    coeffs = []
    for i in range(n_channels):
        # Compute the wavelet transform coefficients
        coef, _ = pywt.cwt(data_norm[i], scales=scales, wavelet=wavelet)
        coeffs.append(coef)
    
    # Stack coefficients to form a 3D tensor
    coeffs_done = np.stack(coeffs, axis=0)
    
    return coeffs_done

In [11]:
def file_to_tensor(input_folder, output_folder_filtered_files, output_folder_tensor_dataset):
    """
    Load the EEG data from a list of FIF files, apply z-score normalization and wavelet transform to each epoch,
    and return the transformed epochs and their labels as a combined TensorDataset.

    Arguments:
        - input_folder (str): Path to the input folder containing the FIF files.
        - output_folder_filtered_files (str): Path to the output folder for filtered files.
        - output_folder_tensor_dataset (str): Path to the output folder for the combined TensorDataset.

    Returns:
        - Saves filtered files to the output folder filtered, and the combined TensorDataset to the output folder tensor.
    """
    # Create a list to hold the transformed epochs for all files
    all_transformed_epochs = []
    
    # Create a list to hold the corresponding labels for all epochs
    all_labels = []
    
    # Filter the EEG files
    filtered_files = filter_eeg_files(input_folder, output_folder_filtered_files)
    
    # Process each filtered file
    for fif_file in filtered_files:
        # Load the EEG data from the FIF file and extract the epochs and labels
        epochs, labels = get_epochs_and_labels_openvibe(fif_file)
        
        # Process each epoch
        for epoch, label in zip(epochs, labels):
            # Z-score each epoch
            epoch_norm = z_score(epoch)
            
            # Apply wavelet transformation
            epoch_wavelet = apply_wavelet_transform(epoch_norm)
            
            # Append the transformed epoch and its label to the lists
            all_transformed_epochs.append(epoch_wavelet)
            all_labels.append(label)

    # Convert the list of all transformed epochs into a single tensor dataset
    tensor_dataset = torch.tensor(all_transformed_epochs, dtype=torch.float)
    
    # Convert the list of all labels into a tensor
    labels_tensor = torch.tensor(all_labels, dtype=torch.long)
    
    # Combine the tensor dataset and labels tensor into a TensorDataset
    dataset = TensorDataset(tensor_dataset, labels_tensor)
    
    # Get the current date and time
    current_datetime = datetime.now()
    
    # Format the date and time as a string for the dataset name
    dataset_name = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
    
    # Save the combined dataset to the output folder with the specified name
    output_file = os.path.join(output_folder_tensor_dataset, f"baseline_dataset_{dataset_name}.pt")
    torch.save(dataset, output_file)
    
    return tensor_dataset, labels_tensor, dataset

In [12]:
input_folder = r"C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\raw"
output_folder_filtered_files = r"C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\processed"
output_folder_tensor_dataset = r"C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\interim"
tensor_dataset, labels_tensor, dataset = file_to_tensor(input_folder)

Extracting EDF parameters from C:\School\EE_Y3\Q4\BAP\eeg_thesis_cnn_repo\data\openvibe\raw\record-[2024.05.29-15.06.48]_FILTERED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 36749  =      0.000 ...   146.996 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 8 - 30 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 20 (effective, after forward-backward)
- Cutoffs at 8.00, 30.00 Hz: -6.02, -6.02 dB



  raw = mne.io.read_raw_edf(file_path, preload=True)


NameError: name 'output_file_path' is not defined

In [13]:
tensor_dataset.shape, labels_tensor.shape, dataset.tensors[0].shape, dataset.tensors[1].shape

(torch.Size([50, 8, 23, 126]),
 torch.Size([50]),
 torch.Size([50, 8, 23, 126]),
 torch.Size([50]))