In [1]:
import numpy as np
import pandas as pd
from numba import jit

# Optimized Approximate Entropy function with Numba JIT and sliding window
@jit(nopython=True)
def approximate_entropy(U, m, r):
    """
    Compute Approximate Entropy (ApEn) of a time series in a memory-efficient way.
    
    Parameters:
    U : array-like
        The input signal.
    m : int
        The length of compared run of data.
    r : float
        The filtering level (standard deviation * r).
    
    Returns:
    float
        Approximate entropy of the input signal.
    """
    def _phi(m):
        N = len(U)
        count = 0
        for i in range(N - m):
            template = U[i:i + m]
            matches = 0
            for j in range(N - m):
                candidate = U[j:j + m]
                if np.max(np.abs(template - candidate)) <= r:
                    matches += 1
            count += np.log(matches / (N - m + 1))
        return count / (N - m + 1)

    return abs(_phi(m) - _phi(m + 1))

# Sliding window function for Approximate Entropy
def calculate_apen_sliding_window(signal, m=2, r_factor=0.2, window_size=128, step_size=64):
    """
    Compute Approximate Entropy using a sliding window approach.
    
    Parameters:
    signal : array-like
        The input signal (EEG data).
    m : int
        The embedding dimension.
    r_factor : float
        The factor to calculate the tolerance level (r = r_factor * std).
    window_size : int
        Size of the sliding window (number of samples).
    step_size : int
        Step size for the sliding window (how many samples to shift).
    
    Returns:
    array
        Approximate entropy for each window.
    """
    r = r_factor * np.std(signal)  # Set r based on the standard deviation of the signal
    apen_values = []

    # Slide the window across the signal
    for start in range(0, len(signal) - window_size + 1, step_size):
        window = signal[start:start + window_size]
        apen_value = approximate_entropy(window, m=m, r=r)
        apen_values.append(apen_value)

    return np.array(apen_values)

# Function to read and process large EEG data in chunks if needed
def calculate_apen_for_large_data(preprocessed_file, eeg_channel, m=2, r_factor=0.2, window_size=128, step_size=64, chunksize=100000):
    """
    Compute Approximate Entropy for large EEG data using sliding windows and chunk processing.
    
    Parameters:
    preprocessed_file : str
        Path to the preprocessed EEG CSV file.
    eeg_channel : str
        Name of the EEG channel column in the CSV file.
    m : int
        Embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor.
    window_size : int
        Size of sliding window.
    step_size : int
        Step size for sliding window.
    chunksize : int
        Number of samples to read at a time from the file.
    
    Returns:
    array
        Approximate entropy for each window in the large dataset.
    """
    apen_values = []

    # Load data in chunks to avoid memory overload
    for chunk in pd.read_csv(preprocessed_file, chunksize=chunksize):
        signal = chunk[eeg_channel].values
        apen_chunk_values = calculate_apen_sliding_window(signal, m, r_factor, window_size, step_size)
        apen_values.extend(apen_chunk_values)

    return np.array(apen_values)




In [2]:
# Example usage
if __name__ == "__main__":
    apen_values = calculate_apen_for_large_data('preprocessed_eeg_data.csv', 'Fz', window_size=128, step_size=64)
    print("Approximate Entropy values:", apen_values)

Approximate Entropy values: [0.6438188  0.60599725 0.71156773 ... 0.38065699 0.32595    0.50583927]
