In [2]:
#!pip install mne scipy
#!pip install pandas numpy openpyxl
#!pip install tsfresh
#!pip install PyWavelets

In [1]:
import os
import numpy as np
import scipy.signal as signal
import mne

def process_all_eeg_data() -> dict:
    """
    Process all .bdf EEG files in the current directory, applying filters and extracting data from
    channels A15 (O1), A16 (Oz), and A17 (O2).

    Returns
    -------
    dict
        A dictionary containing processed EEG data and header information for each file.
    """
    # Get a list of all .bdf files in the current directory
    files = [f for f in os.listdir('.') if f.endswith('.bdf')]
    if not files:
        raise FileNotFoundError("No BDF files found in the current directory")
    
    # Initialize the results dictionary
    results = {}
    
    # Loop over each file
    for filename in files:
        full_file_path = os.path.join(os.getcwd(), filename)
        
        # Read the raw EEG data using MNE
        raw = mne.io.read_raw_bdf(full_file_path, preload=True)
        hdr = raw.info
        
        # Select data from channels A15 (O1), A16 (Oz), and A17 (O2)
        channels_select = ['A15', 'A16', 'A17']
        missing_channels = [ch for ch in channels_select if ch not in hdr['ch_names']]
        if missing_channels:
            raise ValueError(f"Selected channels {missing_channels} not found in the data")
        
        channel_indices = [hdr['ch_names'].index(ch) for ch in channels_select]
        EEG_data = raw.get_data(picks=channel_indices).T  # Shape: (n_samples, n_channels)
        
        # Filter EEG Data
        Fs = hdr['sfreq']  # Sampling frequency
        
        # Bandpass filter parameters (2 to 80 Hz)
        Fc_BP = [2, 80]  # Bandpass frequency range
        Wn_BP = [f / (Fs / 2) for f in Fc_BP]  # Normalize by Nyquist frequency
        
        # Create and apply bandpass filter (6th order zero-phase Butterworth IIR)
        B_BP, A_BP = signal.butter(3, Wn_BP, btype='bandpass')
        EEG_filtered_BP = signal.filtfilt(B_BP, A_BP, EEG_data, axis=0)
        
        # Band stop filter parameters (48 to 52 Hz)
        Fc_BS = [48, 52]  # Band stop frequency range
        Wn_BS = [f / (Fs / 2) for f in Fc_BS]  # Normalize by Nyquist frequency
        
        # Create and apply band stop filter (6th order zero-phase Butterworth IIR)
        B_BS, A_BS = signal.butter(3, Wn_BS, btype='bandstop')
        EEG_filtered = signal.filtfilt(B_BS, A_BS, EEG_filtered_BP, axis=0)
        
        # Extract prefix before underscore from the filename
        underscore_index = filename.find('_')
        if underscore_index == -1:
            raise ValueError(f"Filename format error, no underscore found in {filename}")
        key = filename[:underscore_index]
        
        # Store results in the dictionary
        results[key] = {
            'data': EEG_filtered,      # Filtered data for channels A15, A16, A17
            'channels': channels_select,  # List of channel names
            'header': hdr
        }
        
        # Display a message indicating successful processing
        print(f"Data for file {filename} processed successfully")
    
    return results


In [2]:
results = process_all_eeg_data()

Extracting EDF parameters from c:\Users\WERPELGA\OneDrive - Danone\Desktop\UoA\2024.1&2\Python Gabe\A1_Full_Block.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 739327  =      0.000 ...   361.000 secs...
Data for file A1_Full_Block.bdf processed successfully
Extracting EDF parameters from c:\Users\WERPELGA\OneDrive - Danone\Desktop\UoA\2024.1&2\Python Gabe\A3_Full_Block.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 757759  =      0.000 ...   370.000 secs...
Data for file A3_Full_Block.bdf processed successfully
Extracting EDF parameters from c:\Users\WERPELGA\OneDrive - Danone\Desktop\UoA\2024.1&2\Python Gabe\A4_Full_Block.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 782335  =      0.000 ...   382.000 secs...
Data for file A4_Full_Block.bdf processed successfully
Extracting EDF parameters from c:\Users\WERPELGA\One

In [3]:
import numpy as np
import pandas as pd

def segment_eeg_data_new(results: dict, cohort_file: str = 'Cohort.xlsx') -> dict:
    """
    Segments EEG data into predefined sections (EC, EO, LC, RC, DEC, NDEC) based on cohort information,
    removing the first 2 seconds from each section.

    Parameters
    ----------
    results : dict
        Dictionary containing the raw EEG data and header information for each key (participant).
    cohort_file : str, optional
        Path to the Excel file containing cohort information (default is 'Cohort.xlsx').

    Returns
    -------
    dict
        Dictionary containing segmented EEG data for each participant.
    """
    # Read the cohort information from an Excel file
    cohort_table = pd.read_excel(cohort_file)
    # Segment Duration (in seconds)
    segment_duration = 10  # Original segment duration in seconds
    skip_duration = 2      # Duration to skip at the start of each segment (2 seconds)

    # Initialize the segmented results dictionary
    segmented_data = {}

    # Iterate through each key in the results dictionary
    for key, result in results.items():
        data = result['data']  # Data shape: (n_samples, n_channels)
        hdr = result['header']

        # Find the matching row in the cohort table
        cohort_row = cohort_table[cohort_table['Cohort'] == key]
        
        if cohort_row.empty:
            raise ValueError(f"Cohort information not found for {key}")

        # Define the sample rate and calculate sample counts
        Fs = hdr['sfreq']  # Sampling frequency
        samples_per_segment = int(segment_duration * Fs)
        samples_to_skip = int(skip_duration * Fs)
        effective_samples_per_segment = samples_per_segment - samples_to_skip
        n_channels = data.shape[1]  # Number of channels (should be 3: O1, Oz, O2)

        # Initialize segments with zeros
        EC = np.zeros((effective_samples_per_segment, n_channels))
        EO = np.zeros((effective_samples_per_segment, n_channels))
        LC = np.zeros((effective_samples_per_segment, n_channels))
        RC = np.zeros((effective_samples_per_segment, n_channels))
        DEC = np.zeros((effective_samples_per_segment, n_channels))
        NDEC = np.zeros((effective_samples_per_segment, n_channels))

        # Fill segments with data if available, skipping the first 2 seconds
        # EC segment
        segment_start = 0
        segment_end = samples_per_segment
        if data.shape[0] >= segment_end:
            EC = data[segment_start + samples_to_skip : 0, :]
        else:
            print(f"Not enough data for EC segment in {key}")

        # EO segment
        segment_start = samples_per_segment
        segment_end = 2 * samples_per_segment
        if data.shape[0] >= segment_end:
            EO = data[segment_start + samples_to_skip : segment_end, :]
        else:
            print(f"Not enough data for EO segment in {key}")

        # LC segment
        segment_start = 2 * samples_per_segment
        segment_end = 3 * samples_per_segment
        if data.shape[0] >= segment_end:
            LC = data[segment_start + samples_to_skip : segment_end, :]
        else:
            print(f"Not enough data for LC segment in {key}")

        # RC segment
        segment_start = 3 * samples_per_segment
        segment_end = 4 * samples_per_segment
        if data.shape[0] >= segment_end:
            RC = data[segment_start + samples_to_skip : segment_end, :]
        else:
            print(f"Not enough data for RC segment in {key}")

        # Apply conditions based on cohort table
        if cohort_row['LC'].values[0] == 'DEC':
            # Assign 'DEC' to LC and 'NDEC' to RC
            DEC = LC
            NDEC = RC
        elif cohort_row['RC'].values[0] == 'DEC':
            # Assign 'DEC' to RC and 'NDEC' to LC
            DEC = RC
            NDEC = LC
        else:
            # If neither LC nor RC is 'DEC', assign NDEC accordingly
            NDEC = LC
            # Optionally handle cases where DEC is not specified
            DEC = RC  # Or set DEC to zeros if appropriate

        # Store the segmented data and 'LinesDifference' in the results dictionary
        segmented_data[key] = {
            'header': hdr,
            'EC': EC,
            'EO': EO,
            'DEC': DEC,
            'NDEC': NDEC,
            'LinesDifference': cohort_row['LinesDifference'].values[0]
        }

    return segmented_data


In [4]:
segmented_data = segment_eeg_data_new(results)

In [5]:
import pandas as pd
import numpy as np
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

def prepare_time_series_by_section(segmented_data, cohort_table):
    """
    Prepares a DataFrame suitable for tsfresh from segmented EEG data for all sections (EC, EO, DEC, NDEC).

    Parameters
    ----------
    segmented_data : dict
        The dictionary containing segmented EEG data for each participant.
    cohort_table : pd.DataFrame
        DataFrame containing cohort information (including labels for Amblyopia/Control).

    Returns
    -------
    pd.DataFrame, pd.Series
        A DataFrame where each row represents a time-series sample with columns 'id', 'time', 'O1', 'Oz', 'O2',
        and a Series with group labels indexed by 'id'.
    """
    data_list = []
    labels_list = []

    # Loop through each participant's data
    for key, value in segmented_data.items():
        # Find the matching cohort row
        cohort_row = cohort_table[cohort_table['Cohort'] == key]
        if cohort_row.empty:
            continue

        # Assign label based on the first letter of the 'Cohort' column (Amblyopia = 1, Control = 0)
        label = 1 if key.startswith('A') else 0

        # Get channel names; default to ['O1', 'Oz', 'O2'] if not available
        channels = value.get('channels', ['O1', 'Oz', 'O2'])

        # For each section (EC, EO, DEC, NDEC)
        for section in ['EC', 'EO', 'DEC', 'NDEC']:
            section_data = value[section]  # Shape: (n_samples, n_channels)

            # Create a DataFrame for this section
            n_samples = section_data.shape[0]
            df = pd.DataFrame({
                'id': f"{key}_{section}",
                'time': np.arange(n_samples)
            })

            # Add each channel's data as a column
            for idx, channel_name in enumerate(channels):
                df[channel_name] = section_data[:, idx]

            # Append to data list
            data_list.append(df)

            # Append label for this 'id' (participant_section)
            labels_list.append({'id': f"{key}_{section}", 'label': label})

    # Concatenate all data into a single DataFrame
    time_series_df = pd.concat(data_list, ignore_index=True)

    # Create a labels DataFrame and convert to a Series indexed by 'id'
    labels_df = pd.DataFrame(labels_list).drop_duplicates(subset='id')
    labels_series = labels_df.set_index('id')['label']

    # Return the time-series data and corresponding labels
    return time_series_df, labels_series

# Load your cohort table (must include 'Cohort' column)
cohort_table = pd.read_excel('Cohort.xlsx')

# Prepare the time series DataFrame and labels
time_series_df, labels = prepare_time_series_by_section(segmented_data, cohort_table)


In [6]:
time_series_df

Unnamed: 0,id,time,O1,Oz,O2
0,A1_EO,0,-0.000013,-0.000026,-0.000036
1,A1_EO,1,-0.000016,-0.000028,-0.000038
2,A1_EO,2,-0.000019,-0.000031,-0.000040
3,A1_EO,3,-0.000022,-0.000034,-0.000042
4,A1_EO,4,-0.000025,-0.000036,-0.000043
...,...,...,...,...,...
638971,C1_NDEC,16379,-0.000003,-0.000004,-0.000004
638972,C1_NDEC,16380,-0.000003,-0.000003,-0.000004
638973,C1_NDEC,16381,-0.000003,-0.000002,-0.000003
638974,C1_NDEC,16382,-0.000003,-0.000002,-0.000003


In [9]:
import pandas as pd

# Save time_series_df as CSV
time_series_df.to_csv('time_series_df_full.csv', index=False)

# Save labels as CSV
labels.to_csv('labels_full.csv', index=False, header=True)

# Optionally, save labels as Pickle (preserves Python object types)
# labels.to_pickle('labels.pkl')

In [10]:
# import pandas as pd

# # Read time_series_df from CSV
# time_series_df = pd.read_csv('time_series_df_full.csv')

# # Read labels from CSV
# labels = pd.read_csv('labels_full.csv')



In [7]:
# labels.index = time_series_df['id'].unique()

In [7]:
# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier  # Import XGBoost classifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import gc
from tsfresh import extract_features
from tsfresh.feature_extraction import ComprehensiveFCParameters, MinimalFCParameters
from tsfresh.utilities.dataframe_functions import impute

# Define the function to process data in chunks using ComprehensiveFCParameters
def process_in_chunks(time_series_df, N):
    # Get unique IDs
    unique_ids = time_series_df['id'].unique()
    
    # Split the unique IDs into chunks of size N
    chunks = [unique_ids[i:i + N] for i in range(0, len(unique_ids), N)]
    
    # Initialize an empty list to store the results
    results = []
    
    # Process each chunk
    for chunk in chunks:
        # Filter the DataFrame to include only the IDs in the current chunk
        chunk_df = time_series_df[time_series_df['id'].isin(chunk)]
        
        # Extract features for the current chunk using ComprehensiveFCParameters
        extracted_features_chunk = extract_features(
            chunk_df,
            column_id='id',
            column_sort='time',
            default_fc_parameters=MinimalFCParameters(),  # Use ComprehensiveFCParameters() for more features
            n_jobs=4,  # Adjust based on your CPU cores
            # Since data is in wide format, we do not need to specify column_kind and column_value
        )
        
        # Impute missing values in the extracted features
        impute(extracted_features_chunk)
        
        # Append the extracted features to the results list
        results.append(extracted_features_chunk)
        
        # Clear memory
        del chunk_df, extracted_features_chunk
        gc.collect()
    
    # Concatenate all the results into a single DataFrame
    final_result = pd.concat(results)
    
    return final_result

# Set the chunk size N (adjust based on your memory constraints)
N = 10  # Smaller chunk size to manage memory usage

# Extract features using the process_in_chunks function
extracted_features = process_in_chunks(time_series_df, N)

# Drop any columns with NaN or infinite values
extracted_features_clean = extracted_features.replace([np.inf, -np.inf], np.nan).dropna(axis=1)

# Ensure that the labels are aligned with the extracted features
# Assuming 'labels' is a Series with 'id' as the index
labels_aligned = labels.loc[extracted_features_clean.index]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    extracted_features_clean,
    labels_aligned,
    test_size=0.2,
    random_state=42,
    stratify=labels_aligned  # Ensure stratified sampling
)

# Select the most important features using ANOVA F-test
selector = SelectKBest(f_classif, k=10)  # Adjust 'k' as needed
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

# Get the names of the selected features
selected_feature_names = extracted_features_clean.columns[selector.get_support()]

# Initialize an empty dictionary to store classifiers and their parameter grids
classifiers = {
    'Random Forest': {
        'model': RandomForestClassifier(random_state=42),
        'param_grid': {
            'n_estimators': [100, 200, 500],
            'max_depth': [None, 10, 20, 30],
            'min_samples_split': [2, 5, 10],
        }
    },
    'Logistic Regression': {
        'model': LogisticRegression(random_state=42, max_iter=5000),
        'param_grid': {
            'penalty': ['l1', 'l2'],
            'C': [0.01, 0.1, 1, 10, 100],
            'solver': ['liblinear'],
        }
    },
    'Support Vector Machine': {
        'model': SVC(random_state=42),
        'param_grid': [
            {'kernel': ['linear'], 'C': [0.1, 1, 10, 100]},
            {'kernel': ['rbf'], 'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto']},
            {'kernel': ['poly'], 'C': [0.1, 1, 10], 'degree': [2, 3], 'gamma': ['scale', 'auto']}
        ]
    },
    'Gradient Boosting': {
        'model': GradientBoostingClassifier(random_state=42),
        'param_grid': {
            'n_estimators': [100, 200],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 5],
            'min_samples_split': [2, 5],
        }
    },
    'Neural Network': {
        'model': Pipeline([
            ('scaler', StandardScaler()),
            ('classifier', MLPClassifier(random_state=42, max_iter=500))
        ]),
        'param_grid': {
            'classifier__hidden_layer_sizes': [(50,), (100,), (50, 50)],
            'classifier__activation': ['tanh', 'relu'],
            'classifier__solver': ['adam', 'sgd'],
            'classifier__alpha': [0.0001, 0.001],
            'classifier__learning_rate': ['constant', 'adaptive'],
        }
    },
    'XGBoost': {
        'model': XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
        'param_grid': {
            'n_estimators': [50, 100, 200, 300, 500],
            'max_depth': [3, 5, 7, 9],
            'learning_rate': [0.01, 0.1, 0.2, 0.3, 0.5, 0.9],
            'subsample': [0.5, 0.7, 0.8, 1.0],
            'colsample_bytree': [0.5, 0.7, 0.8, 1.0],
        }
    }
}

# Loop through each classifier, perform grid search, and evaluate
for name, classifier_info in classifiers.items():
    print(f"\nTraining and evaluating {name}...")
    model = classifier_info['model']
    param_grid = classifier_info['param_grid']
    
    # For classifiers that include the feature selection or scaling in a pipeline, use X_train and X_test directly
    if name == 'Neural Network':
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_clf = grid_search.best_estimator_
        y_pred = best_clf.predict(X_test)
    else:
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
        grid_search.fit(X_train_selected, y_train)
        best_clf = grid_search.best_estimator_
        y_pred = best_clf.predict(X_test_selected)
    
    # Print the best parameters found by GridSearchCV
    print(f"Best parameters for {name}: {grid_search.best_params_}")
    
    # Evaluate the model
    print(f"Classification Report for {name}:")
    print(classification_report(y_test, y_pred))
    
    # For models that provide feature importances, display them
    if hasattr(best_clf, 'feature_importances_'):
        important_features = pd.DataFrame({
            'Feature': selected_feature_names,
            'Importance': best_clf.feature_importances_
        }).sort_values(by='Importance', ascending=False)
        print(f"Important features for {name}:")
        print(important_features)
    elif hasattr(best_clf, 'coef_'):
        # For linear models like Logistic Regression
        importance = np.abs(best_clf.coef_[0])
        important_features = pd.DataFrame({
            'Feature': selected_feature_names,
            'Importance': importance
        }).sort_values(by='Importance', ascending=False)
        print(f"Important features for {name}:")
        print(important_features)
    else:
        print(f"{name} does not provide feature importances directly.")


Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.58it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.64it/s]
Feature Extraction: 100%|██████████| 15/15 [00:03<00:00,  4.49it/s]
Feature Extraction: 100%|██████████| 14/14 [00:03<00:00,  4.30it/s]
  f = msb / msw



Training and evaluating Random Forest...
Best parameters for Random Forest: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 500}
Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       0.60      0.75      0.67         4

    accuracy                           0.62         8
   macro avg       0.63      0.62      0.62         8
weighted avg       0.63      0.62      0.62         8

Important features for Random Forest:
                  Feature  Importance
8    O2__absolute_maximum    0.244365
0          Oz__sum_values    0.134510
7             O2__maximum    0.131653
4  O2__standard_deviation    0.130236
9             O2__minimum    0.129713
6    O2__root_mean_square    0.126302
2          O2__sum_values    0.103221
1                Oz__mean    0.000000
3                O2__mean    0.000000
5            O2__variance    0.000000

Training and evaluating Logistic

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.50      1.00      0.67         4

    accuracy                           0.50         8
   macro avg       0.25      0.50      0.33         8
weighted avg       0.25      0.50      0.33         8

Important features for Logistic Regression:
                  Feature    Importance
0          Oz__sum_values  3.034310e-05
2          O2__sum_values  2.794456e-05
8    O2__absolute_maximum  1.114411e-06
9             O2__minimum  1.031037e-06
7             O2__maximum  8.252159e-07
6    O2__root_mean_square  1.613520e-07
4  O2__standard_deviation  1.613487e-07
1                Oz__mean  1.851996e-09
3                O2__mean  1.705601e-09
5            O2__variance  1.585562e-12

Training and evaluating Support Vector Machine...
Best parameters for Support Vector Machine: {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
Classification Report for Support 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 200}
Classification Report for Gradient Boosting:
              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       0.60      0.75      0.67         4

    accuracy                           0.62         8
   macro avg       0.63      0.62      0.62         8
weighted avg       0.63      0.62      0.62         8

Important features for Gradient Boosting:
                  Feature  Importance
8    O2__absolute_maximum    0.409995
0          Oz__sum_values    0.116827
7             O2__maximum    0.109700
6    O2__root_mean_square    0.102658
2          O2__sum_values    0.089874
9             O2__minimum    0.088785
4  O2__standard_deviation    0.082161
1                Oz__mean    0.000000
3                O2__mean    0.000000
5            O2__variance    0.000000

Training and evaluating Neural Network...




Best parameters for Neural Network: {'classifier__activation': 'relu', 'classifier__alpha': 0.0001, 'classifier__hidden_layer_sizes': (50, 50), 'classifier__learning_rate': 'constant', 'classifier__solver': 'sgd'}
Classification Report for Neural Network:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         4
           1       0.67      1.00      0.80         4

    accuracy                           0.75         8
   macro avg       0.83      0.75      0.73         8
weighted avg       0.83      0.75      0.73         8

Neural Network does not provide feature importances directly.

Training and evaluating XGBoost...
Best parameters for XGBoost: {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.7}
Classification Report for XGBoost:
              precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.75      0.75      0.75

Parameters: { "use_label_encoder" } are not used.



In [15]:
# Assuming 'best_clf' is your best XGBoost classifier from GridSearchCV
# if name == 'XGBoost':
#     # Save the best XGBoost model using joblib
#     joblib.dump(best_clf, 'best_xgboost_model.pkl')
#     print("XGBoost model saved to 'best_xgboost_model.pkl'")


In [16]:
# import joblib

# # Save the trained classifier
# joblib.dump(best_clf, 'trained_random_forest.pkl')

# # Save the feature selector
# joblib.dump(selector, 'feature_selector.pkl')
