In [3]:
import numpy as np
import pandas as pd
from numba import jit
import os

# Optimized Approximate Entropy function with Numba JIT and sliding window
@jit(nopython=True)
def approximate_entropy(U, m, r):
    """
    Compute Approximate Entropy (ApEn) of a time series in a memory-efficient way.
    
    Parameters:
    U : array-like
        The input signal.
    m : int
        The length of compared run of data.
    r : float
        The filtering level (standard deviation * r).
    
    Returns:
    float
        Approximate entropy of the input signal.
    """
    def _phi(m):
        N = len(U)
        count = 0
        for i in range(N - m):
            template = U[i:i + m]
            matches = 0
            for j in range(N - m):
                candidate = U[j:j + m]
                if np.max(np.abs(template - candidate)) <= r:
                    matches += 1
            count += np.log(matches / (N - m + 1))
        return count / (N - m + 1)

    return abs(_phi(m) - _phi(m + 1))

# Sliding window function for Approximate Entropy
def calculate_apen_sliding_window(signal, m=2, r_factor=0.2, window_size=128, step_size=64):
    """
    Compute Approximate Entropy using a sliding window approach.
    
    Parameters:
    signal : array-like
        The input signal (EEG data).
    m : int
        The embedding dimension.
    r_factor : float
        The factor to calculate the tolerance level (r = r_factor * std).
    window_size : int
        Size of the sliding window (number of samples).
    step_size : int
        Step size for the sliding window (how many samples to shift).
    
    Returns:
    array
        Approximate entropy for each window.
    """
    r = r_factor * np.std(signal)  # Set r based on the standard deviation of the signal
    apen_values = []

    # Slide the window across the signal
    for start in range(0, len(signal) - window_size + 1, step_size):
        window = signal[start:start + window_size]
        apen_value = approximate_entropy(window, m=m, r=r)
        apen_values.append(apen_value)

    return np.array(apen_values)

# Function to read and process large EEG data in chunks if needed
def calculate_apen_for_large_data(preprocessed_file, eeg_channel, output_directory, m=2, r_factor=0.2, window_size=128, step_size=64, chunksize=100000):
    """
    Compute Approximate Entropy for large EEG data using sliding windows and chunk processing.
    
    Parameters:
    preprocessed_file : str
        Path to the preprocessed EEG CSV file.
    eeg_channel : str
        Name of the EEG channel column in the CSV file.
    output_directory : str
        Directory to save the output CSV file.
    m : int
        Embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor.
    window_size : int
        Size of sliding window.
    step_size : int
        Step size for sliding window.
    chunksize : int
        Number of samples to read at a time from the file.
    
    Returns:
    array
        Approximate entropy for each window in the large dataset.
    """
    apen_values = []
    patient_id = None
    label = None

    # Load data in chunks to avoid memory overload
    for chunk in pd.read_csv(preprocessed_file, chunksize=chunksize):
        signal = chunk[eeg_channel].values
        
        # Capture Patient_ID and Label only once
        if patient_id is None:
            patient_id = chunk['Patient_ID'].iloc[0]
            label = chunk['Label'].iloc[0]
        
        apen_chunk_values = calculate_apen_sliding_window(signal, m, r_factor, window_size, step_size)
        apen_values.extend(apen_chunk_values)

    # Prepare the results in a DataFrame
    apen_df = pd.DataFrame({
        'Patient_ID': [patient_id] * len(apen_values),
        'Label': [label] * len(apen_values),
        'ApEn_Values': apen_values
    })
    
    # Create the output directory if it does not exist
    os.makedirs(output_directory, exist_ok=True)
    
    # Save to CSV in the specified output directory
    output_file = os.path.join(output_directory, f'approx_entropy_{patient_id}.csv')
    apen_df.to_csv(output_file, index=False)
    print(f"Approximate Entropy values saved to {output_file}")



In [4]:
# Example usage
if __name__ == "__main__":
    preprocessed_file = 'preprocessed_eeg_data.csv'
    eeg_channel = 'Fz'  # Specify the EEG channel, e.g., 'Cz', 'Fz', etc.
    output_directory = 'ApproximatedForSingle'  # Specify the output directory

    calculate_apen_for_large_data(preprocessed_file, eeg_channel, output_directory, m=2, r_factor=0.2, window_size=128, step_size=64)


Approximate Entropy values saved to ApproximatedForSingle\approx_entropy_v10p_v10p.csv.csv


In [5]:

# Function to calculate Approximate Entropy for all patients
def calculate_apen_for_all_patients(preprocessed_file, output_directory, m=2, r_factor=0.2, window_size=128, step_size=64):
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)
    
    # Load the preprocessed EEG data
    df = pd.read_csv(preprocessed_file)

    # Iterate through each patient in the DataFrame
    unique_patients = df['Patient_ID'].unique()
    for patient_id in unique_patients:
        patient_data = df[df['Patient_ID'] == patient_id]

        # Assuming all EEG channels are columns except 'Label' and 'Patient_ID'
        eeg_columns = [col for col in patient_data.columns if col not in ['Label', 'Patient_ID']]
        
        # Calculate ApEn for each EEG channel and store results
        results = []
        for col in eeg_columns:
            signal = patient_data[col].values
            apen_values = calculate_apen_sliding_window(signal, m=m, r_factor=r_factor, window_size=window_size, step_size=step_size)
            for apen_value in apen_values:
                results.append({'Patient_ID': patient_id, 'Label': patient_data['Label'].values[0], 'ApEn': apen_value})
        
        # Convert results to DataFrame and save to CSV
        results_df = pd.DataFrame(results)
        output_file = os.path.join(output_directory, f'approx_entropy_patient_{patient_id}.csv')
        results_df.to_csv(output_file, index=False)
        print(f"Approximate Entropy results for Patient {patient_id} saved to {output_file}")



In [6]:
# Example usage
if __name__ == "__main__":
    preprocessed_file = 'preprocessed_eeg_data.csv'  
    output_directory = 'ApproximatedForSingle'  
    calculate_apen_for_all_patients(preprocessed_file, output_directory, window_size=128, step_size=64)


Approximate Entropy results for Patient v10p_v10p.csv saved to ApproximatedForSingle\approx_entropy_patient_v10p_v10p.csv.csv
Approximate Entropy results for Patient v12p_v12p.csv saved to ApproximatedForSingle\approx_entropy_patient_v12p_v12p.csv.csv
Approximate Entropy results for Patient v14p_v14p.csv saved to ApproximatedForSingle\approx_entropy_patient_v14p_v14p.csv.csv
Approximate Entropy results for Patient v15p_v15p.csv saved to ApproximatedForSingle\approx_entropy_patient_v15p_v15p.csv.csv
Approximate Entropy results for Patient v173_v173.csv saved to ApproximatedForSingle\approx_entropy_patient_v173_v173.csv.csv
Approximate Entropy results for Patient v177_v177.csv saved to ApproximatedForSingle\approx_entropy_patient_v177_v177.csv.csv
Approximate Entropy results for Patient v179_v179.csv saved to ApproximatedForSingle\approx_entropy_patient_v179_v179.csv.csv
Approximate Entropy results for Patient v181_v181.csv saved to ApproximatedForSingle\approx_entropy_patient_v181_v181.