In [1]:
import pandas as pd
import numpy as np

# Load the preprocessed EEG data
preprocessed_eeg_df = pd.read_csv('preprocessed_eeg_data.csv')

# Define the channels based on regions for entropy ratios
frontal_channels = ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8', 'Fz']
central_channels = ['C3', 'C4', 'Cz']
parietal_channels = ['P3', 'P4', 'Pz']
occipital_channels = ['O1', 'O2']
temporal_channels = ['T3', 'T4', 'T5', 'T6']

# Define a high entropy threshold (e.g., 0.8 - adjust based on data)
entropy_threshold = 0.8

# Group data by Patient_ID and Label for processing per patient
grouped = preprocessed_eeg_df.groupby(['Patient_ID', 'Label'])

# Data structure to collect results
results = []

# Loop through each patient
for (patient_id, label), group in grouped:
    patient_data = {'Patient_ID': patient_id, 'Label': label}
    
    # Average Entropy per Channel
    for channel in frontal_channels + central_channels + parietal_channels + occipital_channels + temporal_channels:
        if channel in group.columns:
            patient_data[f'{channel}_avg_entropy'] = group[channel].mean()
    
    # Entropy Variance per Channel
    for channel in frontal_channels + central_channels + parietal_channels + occipital_channels + temporal_channels:
        if channel in group.columns:
            patient_data[f'{channel}_entropy_variance'] = group[channel].var()
    
    # High-Entropy Instance Count per Channel
    for channel in frontal_channels + central_channels + parietal_channels + occipital_channels + temporal_channels:
        if channel in group.columns:
            patient_data[f'{channel}_high_entropy_count'] = (group[channel] > entropy_threshold).sum()
    
    # Entropy Ratios Across Regions
    frontal_avg = group[frontal_channels].mean().mean()
    parietal_avg = group[parietal_channels].mean().mean()
    central_avg = group[central_channels].mean().mean()
    occipital_avg = group[occipital_channels].mean().mean()
    temporal_avg = group[temporal_channels].mean().mean()
    
    # Ratios (e.g., frontal vs parietal, central vs occipital)
    if parietal_avg != 0:
        patient_data['frontal_parietal_ratio'] = frontal_avg / parietal_avg
    if occipital_avg != 0:
        patient_data['central_occipital_ratio'] = central_avg / occipital_avg
    if temporal_avg != 0:
        patient_data['frontal_temporal_ratio'] = frontal_avg / temporal_avg

    # Append results
    results.append(patient_data)

# Convert results to DataFrame
features_df = pd.DataFrame(results)

# Save to CSV for further use or model training
features_df.to_csv('eeg_entropy_features.csv', index=False)

# Display the feature DataFrame
print(features_df.head())


      Patient_ID  Label  Fp1_avg_entropy  Fp2_avg_entropy  F3_avg_entropy  \
0  v107_v107.csv      0         0.028860        -0.082564       -0.078179   
1  v108_v108.csv      0        -0.079129         0.108437        0.183502   
2  v109_v109.csv      0        -0.029349        -0.081038       -0.146145   
3  v10p_v10p.csv      1         0.255510         0.268580       -0.045112   
4  v110_v110.csv      0        -0.031864        -0.023616       -0.045815   

   F4_avg_entropy  F7_avg_entropy  F8_avg_entropy  Fz_avg_entropy  \
0       -0.024073       -0.003916       -0.010968        0.017927   
1        0.060840        0.032656       -0.038391        0.016858   
2       -0.083420       -0.123330       -0.022056       -0.064505   
3        0.116788       -0.030212        0.203764        0.156493   
4       -0.010767        0.045998        0.000575        0.020589   

   C3_avg_entropy  ...  Pz_high_entropy_count  O1_high_entropy_count  \
0       -0.047716  ...                   3922     

In [2]:
print(features_df['Label'].value_counts())

Label
1    61
0    60
Name: count, dtype: int64


In [None]:
import numpy as np
import pandas as pd
from scipy.signal import welch

# Define frequency bands
bands = {
    'delta': (1, 4),
    'theta': (4, 8),
    'alpha': (8, 13),
    'beta': (13, 30),
    'gamma': (30, 45)
}

# Define channels for each region
frontal_channels = ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8', 'Fz']
central_channels = ['C3', 'C4', 'Cz']
parietal_channels = ['P3', 'P4', 'Pz']
occipital_channels = ['O1', 'O2']
temporal_channels = ['T3', 'T4', 'T5', 'T6']

# Sampling frequency of the EEG data
sampling_freq = 128  # Update if different

# Load the preprocessed EEG data
eeg_data = pd.read_csv('preprocessed_eeg_data.csv')

# Group by Patient_ID and Label for per-patient calculations
grouped = eeg_data.groupby(['Patient_ID', 'Label'])

# Function to calculate band power
def bandpower(data, sf, band):
    fmin, fmax = band
    freqs, psd = welch(data, sf, nperseg=sf*2)
    # Frequency resolution
    freq_res = freqs[1] - freqs[0]
    # Power in the specific band
    band_power = np.sum(psd[(freqs >= fmin) & (freqs <= fmax)]) * freq_res
    return band_power

# Data structure to collect results
results = []

# Iterate through each patient
for (patient_id, label), group in grouped:
    patient_data = {'Patient_ID': patient_id, 'Label': label}
    
    # Loop through each channel
    for channel in frontal_channels + central_channels + parietal_channels + occipital_channels + temporal_channels:
        if channel in group.columns:
            # Calculate band power for each band
            channel_data = group[channel].values.flatten()  # Flatten to 1D array for PSD calculation
            for band_name, band_range in bands.items():
                patient_data[f'{channel}_{band_name}_power'] = bandpower(channel_data, sampling_freq, band_range)
            
            # Calculate relative power ratios (e.g., theta/beta and alpha/beta)
            theta_power = patient_data.get(f'{channel}_theta_power', 0)
            beta_power = patient_data.get(f'{channel}_beta_power', 0)
            alpha_power = patient_data.get(f'{channel}_alpha_power', 0)
            patient_data[f'{channel}_theta_beta_ratio'] = theta_power / beta_power if beta_power else 0
            patient_data[f'{channel}_alpha_beta_ratio'] = alpha_power / beta_power if beta_power else 0
    
    # Aggregate band power by region and calculate ratios
    for region, channels in zip(
        ['frontal', 'central', 'parietal', 'occipital', 'temporal'],
        [frontal_channels, central_channels, parietal_channels, occipital_channels, temporal_channels]
    ):
        for band_name in bands.keys():
            band_powers = [patient_data.get(f'{channel}_{band_name}_power', 0) for channel in channels if f'{channel}_{band_name}_power' in patient_data]
            patient_data[f'{region}_{band_name}_avg_power'] = np.mean(band_powers) if band_powers else 0
    
    # Ratios between regions (e.g., frontal to parietal)
    if patient_data['parietal_theta_avg_power']:
        patient_data['frontal_parietal_theta_ratio'] = patient_data['frontal_theta_avg_power'] / patient_data['parietal_theta_avg_power']
    
    results.append(patient_data)

# Convert results to DataFrame
frequency_features_df = pd.DataFrame(results)

# Save to CSV for further analysis or model training
frequency_features_df.to_csv('eeg_frequency_features.csv', index=False)

# Display the feature DataFrame
print(frequency_features_df.head())
print(frequency_features_df['Label'].value_counts())


      Patient_ID  Label  Fp1_delta_power  Fp1_theta_power  Fp1_alpha_power  \
0  v107_v107.csv      0      1539.260878       647.897061       471.335050   
1  v108_v108.csv      0      2310.407244       645.555106       375.233599   
2  v109_v109.csv      0      1791.616374       397.226841       162.698976   
3  v10p_v10p.csv      1       836.477757       385.366247       249.167116   
4  v110_v110.csv      0      1095.072758       386.386770       234.786637   

   Fp1_beta_power  Fp1_gamma_power  Fp1_theta_beta_ratio  \
0      320.292596        72.602806              2.022829   
1      234.805055        62.895223              2.749324   
2      215.229198        52.582005              1.845599   
3      417.249998       238.687758              0.923586   
4      427.818918       239.815063              0.903155   

   Fp1_alpha_beta_ratio  Fp2_delta_power  ...  occipital_theta_avg_power  \
0              1.471576      1278.199096  ...                 625.574419   
1              1.5

: 