In [1]:
import numpy as np
import pandas as pd
from numba import jit

# Optimized Approximate Entropy function with Numba JIT and sliding window
@jit(nopython=True)
def approximate_entropy(U, m, r):
    """
    Compute Approximate Entropy (ApEn) of a time series in a memory-efficient way.
    
    Parameters:
    U : array-like
        The input signal.
    m : int
        The length of compared run of data.
    r : float
        The filtering level (standard deviation * r).
    
    Returns:
    float
        Approximate entropy of the input signal.
    """
    def _phi(m):
        N = len(U)
        count = 0
        for i in range(N - m):
            template = U[i:i + m]
            matches = 0
            for j in range(N - m):
                candidate = U[j:j + m]
                if np.max(np.abs(template - candidate)) <= r:
                    matches += 1
            count += np.log(matches / (N - m + 1))
        return count / (N - m + 1)

    return abs(_phi(m) - _phi(m + 1))

# Sliding window function for Approximate Entropy
def calculate_apen_sliding_window(signal, m=2, r_factor=0.2, window_size=128, step_size=64):
    """
    Compute Approximate Entropy using a sliding window approach.
    
    Parameters:
    signal : array-like
        The input signal (EEG data).
    m : int
        The embedding dimension.
    r_factor : float
        The factor to calculate the tolerance level (r = r_factor * std).
    window_size : int
        Size of the sliding window (number of samples).
    step_size : int
        Step size for the sliding window (how many samples to shift).
    
    Returns:
    array
        Approximate entropy for each window.
    """
    r = r_factor * np.std(signal)  # Set r based on the standard deviation of the signal
    apen_values = []

    # Slide the window across the signal
    for start in range(0, len(signal) - window_size + 1, step_size):
        window = signal[start:start + window_size]
        apen_value = approximate_entropy(window, m=m, r=r)
        apen_values.append(apen_value)

    return np.array(apen_values)

# Function to read and process large EEG data in chunks if needed
def calculate_apen_for_large_data(preprocessed_file, eeg_channel, m=2, r_factor=0.2, window_size=128, step_size=64, chunksize=100000):
    """
    Compute Approximate Entropy for large EEG data using sliding windows and chunk processing.
    
    Parameters:
    preprocessed_file : str
        Path to the preprocessed EEG CSV file.
    eeg_channel : str
        Name of the EEG channel column in the CSV file.
    m : int
        Embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor.
    window_size : int
        Size of sliding window.
    step_size : int
        Step size for sliding window.
    chunksize : int
        Number of samples to read at a time from the file.
    
    Returns:
    array
        Approximate entropy for each window in the large dataset.
    """
    apen_values = []

    # Load data in chunks to avoid memory overload
    for chunk in pd.read_csv(preprocessed_file, chunksize=chunksize):
        signal = chunk[eeg_channel].values
        apen_chunk_values = calculate_apen_sliding_window(signal, m, r_factor, window_size, step_size)
        apen_values.extend(apen_chunk_values)

    return np.array(apen_values)




In [2]:
# Example usage
if __name__ == "__main__":
    apen_values = calculate_apen_for_large_data('preprocessed_eeg_data.csv', 'Fz', window_size=128, step_size=64)
    print("Approximate Entropy values:", apen_values)

Approximate Entropy values: [0.6438188  0.60599725 0.71156773 ... 0.38065699 0.32595    0.50583927]


In [2]:
import numpy as np
import pandas as pd
from numba import jit
import os

# Optimized Approximate Entropy function with Numba JIT
@jit(nopython=True)
def approximate_entropy(U, m, r):
    """
    Compute Approximate Entropy (ApEn) of a time series.
    
    Parameters:
    U : array-like
        The input signal.
    m : int
        The length of compared run of data.
    r : float
        The filtering level (standard deviation * r).
    
    Returns:
    float
        Approximate entropy of the input signal.
    """
    def _phi(m):
        N = len(U)
        count = 0
        for i in range(N - m):
            template = U[i:i + m]
            matches = 0
            for j in range(N - m):
                candidate = U[j:j + m]
                if np.max(np.abs(template - candidate)) <= r:
                    matches += 1
            count += np.log(matches / (N - m + 1))
        return count / (N - m + 1)

    return abs(_phi(m) - _phi(m + 1))

# Function to calculate ApEn for each feature of a single patient
def calculate_apen_for_patient(eeg_data, patient_id, m=2, r_factor=0.2):
    """
    Calculate Approximate Entropy for all features of a single patient.
    
    Parameters:
    eeg_data : DataFrame
        EEG data of a single patient with 19 features (channels).
    patient_id : str
        Identifier for the patient (Patient_ID).
    m : int
        The embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor for ApEn calculation.
        
    Returns:
    DataFrame
        A DataFrame with Patient_ID and ApEn values for each channel.
    """
    apen_results = {'Patient_ID': patient_id}
    
    for channel in eeg_data.columns:
        signal = eeg_data[channel].values
        r = r_factor * np.std(signal)  # Set tolerance based on signal's standard deviation
        apen_value = approximate_entropy(signal, m=m, r=r)
        apen_results[channel] = apen_value
    
    # Convert the results to a DataFrame
    apen_df = pd.DataFrame([apen_results])
    
    return apen_df

# Function to calculate ApEn for each patient in a preprocessed file
def process_patients_in_file(preprocessed_file,output_directory, m=2, r_factor=0.2):
    """
    Calculate Approximate Entropy for all patients in the preprocessed file.
    
    Parameters:
    preprocessed_file : str
        Path to the preprocessed EEG CSV file containing multiple patients.
    m : int
        Embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor for ApEn calculation.
        
    Returns:
    None
    """
    # Load preprocessed EEG data
    df = pd.read_csv(preprocessed_file)
    os.makedirs(output_directory, exist_ok=True)
    # Group by Patient_ID and process each patient's data separately
    patient_groups = df.groupby('Patient_ID')
    
    # Loop through each patient group and calculate ApEn for all 19 features
    for patient_id, patient_data in patient_groups:
        # Drop metadata columns (Label, Patient_ID) to focus only on EEG features
        eeg_data = patient_data.drop(columns=['Label', 'Patient_ID'])
        
        # Calculate ApEn for the patient
        apen_df = calculate_apen_for_patient(eeg_data, patient_id, m=m, r_factor=r_factor)
        
        # Save the result to a CSV file named after the patient
        output_file = os.path.join(output_directory, f'approx_entropy_patient_{patient_id}.csv')
        apen_df.to_csv(output_file, index=False)
        print(f"Saved Approximate Entropy results for patient {patient_id} to {output_file}")
        


In [5]:
# Example usage
if __name__ == "__main__":
    # Path to the preprocessed EEG file (containing multiple patients)
    preprocessed_file = 'preprocessed_eeg_data.csv'
    output_directory = 'ApproximatedData'
    # Process all patients in the file
    process_patients_in_file(preprocessed_file,output_directory, m=2, r_factor=0.2)


Saved Approximate Entropy results for patient v107_v107.csv to ApproximatedData\approx_entropy_patient_v107_v107.csv.csv
Saved Approximate Entropy results for patient v108_v108.csv to ApproximatedData\approx_entropy_patient_v108_v108.csv.csv
Saved Approximate Entropy results for patient v109_v109.csv to ApproximatedData\approx_entropy_patient_v109_v109.csv.csv
Saved Approximate Entropy results for patient v10p_v10p.csv to ApproximatedData\approx_entropy_patient_v10p_v10p.csv.csv
Saved Approximate Entropy results for patient v110_v110.csv to ApproximatedData\approx_entropy_patient_v110_v110.csv.csv
Saved Approximate Entropy results for patient v111_v111.csv to ApproximatedData\approx_entropy_patient_v111_v111.csv.csv
Saved Approximate Entropy results for patient v112_v112.csv to ApproximatedData\approx_entropy_patient_v112_v112.csv.csv
Saved Approximate Entropy results for patient v113_v113.csv to ApproximatedData\approx_entropy_patient_v113_v113.csv.csv
Saved Approximate Entropy result

In [1]:
import os
import numpy as np
import pandas as pd
from numba import jit

# Optimized Approximate Entropy function with Numba JIT
@jit(nopython=True)
def approximate_entropy(U, m, r):
    """
    Compute Approximate Entropy (ApEn) of a time series.
    
    Parameters:
    U : array-like
        The input signal.
    m : int
        The length of compared run of data.
    r : float
        The filtering level (standard deviation * r).
    
    Returns:
    float
        Approximate entropy of the input signal.
    """
    def _phi(m):
        N = len(U)
        count = 0
        for i in range(N - m):
            template = U[i:i + m]
            matches = 0
            for j in range(N - m):
                candidate = U[j:j + m]
                if np.max(np.abs(template - candidate)) <= r:
                    matches += 1
            count += np.log(matches / (N - m + 1))
        return count / (N - m + 1)

    return abs(_phi(m) - _phi(m + 1))

# Function to calculate ApEn for each feature of a single patient
def calculate_apen_for_patient(eeg_data, patient_id, m=2, r_factor=0.2):
    """
    Calculate Approximate Entropy for all features of a single patient.
    
    Parameters:
    eeg_data : DataFrame
        EEG data of a single patient with 19 features (channels).
    patient_id : str
        Identifier for the patient (Patient_ID).
    m : int
        The embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor for ApEn calculation.
        
    Returns:
    DataFrame
        A DataFrame with Patient_ID and ApEn values for each channel.
    """
    apen_results = {'Patient_ID': patient_id}
    
    for channel in eeg_data.columns:
        signal = eeg_data[channel].values
        r = r_factor * np.std(signal)  # Set tolerance based on signal's standard deviation
        apen_value = approximate_entropy(signal, m=m, r=r)
        apen_results[channel] = apen_value
    
    # Convert the results to a DataFrame
    apen_df = pd.DataFrame([apen_results])
    
    return apen_df

# Function to calculate ApEn for each patient and skip already processed patients
def process_patients_in_file(preprocessed_file, output_directory, m=2, r_factor=0.2):
    """
    Calculate Approximate Entropy for all patients in the preprocessed file,
    skipping patients that have already been processed.
    
    Parameters:
    preprocessed_file : str
        Path to the preprocessed EEG CSV file containing multiple patients.
    m : int
        Embedding dimension for ApEn calculation.
    r_factor : float
        Tolerance factor for ApEn calculation.
        
    Returns:
    None
    """
    # Load preprocessed EEG data
    df = pd.read_csv(preprocessed_file)
    os.makedirs(output_directory, exist_ok=True)
    
    # Group by Patient_ID and process each patient's data
    patient_groups = df.groupby('Patient_ID')

    # Get list of already processed patient files in the output directory
    processed_patients = {
        filename.replace('approx_entropy_patient_', '').replace('.csv', '')
        for filename in os.listdir(output_directory)
        if filename.startswith('approx_entropy_patient_')
    }

    # Process only patients that haven't been processed yet
    for patient_id, patient_data in patient_groups:
        # Convert patient_id to string and clean it
        patient_id_str = str(patient_id).replace('.csv', '')

        # Skip if this patient's file is already processed
        if patient_id_str in processed_patients:
            print(f"Skipping patient {patient_id_str} (already processed)")
            continue

        # Drop metadata columns (Label, Patient_ID) to focus only on EEG features
        eeg_data = patient_data.drop(columns=['Label', 'Patient_ID'])
        
        # Calculate ApEn for the patient
        apen_df = calculate_apen_for_patient(eeg_data, patient_id_str, m=m, r_factor=r_factor)
        
        # Save the result to a CSV file named after the patient
        output_file = os.path.join(output_directory, f'approx_entropy_patient_{patient_id_str}.csv')
        apen_df.to_csv(output_file, index=False)
        print(f"Saved Approximate Entropy results for patient {patient_id_str} to {output_file}")

#


In [3]:
#  Example usage
if __name__ == "__main__":
    # Path to the preprocessed EEG file (containing multiple patients)
    preprocessed_file = 'preprocessed_eeg_data.csv'
    output_directory = 'ApproximatedData'
    
    # Process all patients in the file, skipping already processed patients
    process_patients_in_file(preprocessed_file, output_directory, m=2, r_factor=0.2)

Skipping patient v107_v107 (already processed)
Skipping patient v108_v108 (already processed)
Skipping patient v109_v109 (already processed)
Skipping patient v10p_v10p (already processed)
Skipping patient v110_v110 (already processed)
Skipping patient v111_v111 (already processed)
Skipping patient v112_v112 (already processed)
Skipping patient v113_v113 (already processed)
Skipping patient v114_v114 (already processed)
Skipping patient v115_v115 (already processed)
Skipping patient v116_v116 (already processed)
Skipping patient v117_v117 (already processed)
Skipping patient v118_v118 (already processed)
Skipping patient v120_v120 (already processed)
Skipping patient v121_v121 (already processed)
Skipping patient v123_v123 (already processed)
Skipping patient v125_v125 (already processed)
Skipping patient v127_v127 (already processed)
Skipping patient v129_v129 (already processed)
Skipping patient v12p_v12p (already processed)
Skipping patient v131_v131 (already processed)
Skipping pati

In [1]:
import os
import pandas as pd

# Function to load class labels from the preprocessed EEG data
def load_class_labels(preprocessed_file):
   
    # Load the preprocessed EEG data
    df = pd.read_csv(preprocessed_file)
    
    # Create a dictionary with Patient_ID as keys and class labels as values
    class_labels = df[['Patient_ID', 'Label']].drop_duplicates()
    class_label_dict = dict(zip(class_labels['Patient_ID'], class_labels['Label']))
    
    return class_label_dict

# Function to combine Approximate Entropy values with class labels
def combine_entropy_with_class(approx_entropy_folder, class_label_dict, output_file):
    
    combined_data = []  # List to store combined data for all patients
    
    # Loop through each Approximate Entropy CSV file
    for file_name in os.listdir(approx_entropy_folder):
        if file_name.endswith('.csv'):  # Ensure we're processing CSV files
            file_path = os.path.join(approx_entropy_folder, file_name)
            # Load the Approximate Entropy data
            df = pd.read_csv(file_path)
            patient_id = df['Patient_ID'].iloc[0]
            
            # Get the class label for the patient
            class_label = class_label_dict.get(patient_id, 'Unknown')
            
            # Add the class label to the Approximate Entropy DataFrame
            df['Class_Label'] = class_label
            
            # Append the DataFrame to the combined data list
            combined_data.append(df)
    
    # Concatenate all data into a single DataFrame
    combined_df = pd.concat(combined_data, ignore_index=True)
    
    # Save the combined data to a new CSV file
    combined_df.to_csv(output_file, index=False)
    print(f"Combined data saved to {output_file}")

# Main function to process everything
def process_approx_entropy_with_labels(preprocessed_file, approx_entropy_folder, output_file):
  
    # Load class labels from the preprocessed EEG data
    class_label_dict = load_class_labels(preprocessed_file)
    
    # Combine Approximate Entropy values with class labels
    combine_entropy_with_class(approx_entropy_folder, class_label_dict, output_file)




In [2]:

if __name__ == "__main__":
   
    preprocessed_file = 'preprocessed_eeg_data.csv'
    approx_entropy_folder = 'ApproximatedData'
    output_file = 'combined_entropy_with_class.csv'

    process_approx_entropy_with_labels(preprocessed_file, approx_entropy_folder, output_file)

Combined data saved to combined_entropy_with_class.csv
