<a href="https://colab.research.google.com/github/May-Xiaoting-Zhou/MX-Project/blob/main/MX_2_Feature_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

freq_bands = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha/mu': (8, 12),
    'low_alpha': (8, 10),
    'high_alpha': (10, 12),
    'mu': (9, 12),
    'sigma': (12, 16),
    'beta': (13, 30),
    'low_beta': (13, 15),
    'mid_beta': (15, 20),
    'high_beta': (20, 30),
    'low_gamma': (30, 50)
}

# Sample rate (adjust as needed)
fs = 125

def calculate_band_power(eeg_signal, freq_bands, fs):
    band_powers = {}
    for band_name, (low_freq, high_freq) in freq_bands.items():
        # Apply FFT to the EEG signal
        fft_values = np.fft.fft(eeg_signal)
        frequencies = np.fft.fftfreq(len(eeg_signal), 1/fs)

        # Find indices corresponding to the frequency band
        idx_band = np.where((frequencies >= low_freq) & (frequencies <= high_freq))

        # Calculate the power in the frequency band
        power = np.sum(np.abs(fft_values[idx_band])**2)
        band_powers[band_name] = power
    return band_powers


def calculate_band_powers_for_dataframe(df, freq_bands, fs):
  band_power_results = []
  for index, row in df.iterrows():
      try:
        row_values = row.drop('Time')
        eeg_signal = row_values
      except (ValueError, KeyError):
        print(f"Error processing row: {index}. Skipping")
        continue

      band_powers = calculate_band_power(eeg_signal, freq_bands, fs)
      band_power_results.append(band_powers)

  # Create a new DataFrame with the band powers
  df_band_powers = pd.DataFrame(band_power_results)

  # Concatenate the original df_exam with the new df_band_powers
  df_with_band_powers = pd.concat([df, df_band_powers], axis=1)
  return df_with_band_powers


In [2]:

def extract_features(df_bands, window_size):

  df_bands['Time'] = df_bands['Time'] // window_size

  grouped = df_bands.groupby('Time')

  agg_results = grouped.agg(['min', 'max', 'std', 'mean'])
  df_agg_results = pd.DataFrame(agg_results.to_records())

  new_columns = []
  for col in df_agg_results.columns:
    if col != 'Time':
      # Remove parentheses and split by comma
      parts = col[1:-1].split(', ')
      # Create a tuple from the parts
      tuple_result = (parts[0].strip("'"), parts[1].strip("'"))
      if isinstance(tuple_result, tuple):
        new_columns.append(f"{tuple_result[0]}_{tuple_result[1]}")
      else:
        new_columns.append(col)
  new_columns = ['Time'] + new_columns
  df_agg_results.columns = new_columns
  return df_agg_results

In [19]:
def extract_features_for_all_subjects(window_size):
  df_exams, df_baselines = [], []
  for sub in range(1, 39):
    try:
      # Read the CSV file into a DataFrame
      df_baseline = pd.read_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/Subject_{sub}_EEG_Baseline_No_Features.csv')
      # df_exam = pd.read_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/Subject_{sub}_EEG_Exam_No_Features.csv')
      df_exam = pd.read_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/Subject_{sub}_EEG_Exam_Impasse_2_No_Features.csv')
      # df_exam = pd.read_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/Subject_{sub}_EEG_Exam_Impasse_No_Features.csv')

      df_bands_baseline = calculate_band_powers_for_dataframe(df_baseline, freq_bands, fs)
      df_features_baseline = extract_features(df_bands_baseline, window_size)

      df_bands_exam = calculate_band_powers_for_dataframe(df_exam, freq_bands, fs)
      df_features_exam = extract_features(df_bands_exam, window_size)

      df_features_baseline['Subject'] = sub
      df_features_exam['Subject'] = sub

      df_features_exam['Label'] = 1 # Aha, Impasse
      df_features_baseline['Label'] = 0 # Non-Aha, Non-Impasse

      df_exams.append(df_features_exam)
      df_baselines.append(df_features_baseline)

    except FileNotFoundError:
      print(f"Subject {sub} not found")
      continue

  df_exams = pd.concat(df_exams)
  df_baselines = pd.concat(df_baselines)

  df_exams.to_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/1_Impasse_BT_Features_Window_{window_size}.csv', index=False)
  df_baselines.to_csv(f'/content/drive/MyDrive/MX_2/EEG/3. Features/0_Non_Impasse_Baseline_Features_Window_{window_size}.csv', index=False)

In [20]:
window_size = 1000 # 1s

extract_features_for_all_subjects(window_size)


Subject 1 not found
Subject 2 not found
Subject 3 not found
Subject 4 not found
Subject 5 not found
Subject 6 not found
Subject 7 not found
Subject 8 not found
Subject 9 not found
Subject 10 not found
Subject 11 not found
Subject 12 not found
Subject 13 not found
Subject 14 not found
Subject 15 not found
Subject 16 not found
Subject 17 not found
Subject 18 not found
Subject 20 not found
Subject 21 not found
Subject 22 not found
Subject 23 not found
Subject 24 not found
Subject 25 not found
Subject 29 not found
Subject 31 not found
Subject 32 not found
Subject 33 not found
Subject 34 not found
Subject 36 not found
Subject 37 not found
Subject 38 not found


In [21]:
window_size = 3000 # 3s

extract_features_for_all_subjects(window_size)

Subject 1 not found
Subject 2 not found
Subject 3 not found
Subject 4 not found
Subject 5 not found
Subject 6 not found
Subject 7 not found
Subject 8 not found
Subject 9 not found
Subject 10 not found
Subject 11 not found
Subject 12 not found
Subject 13 not found
Subject 14 not found
Subject 15 not found
Subject 16 not found
Subject 17 not found
Subject 18 not found
Subject 20 not found
Subject 21 not found
Subject 22 not found
Subject 23 not found
Subject 24 not found
Subject 25 not found
Subject 29 not found
Subject 31 not found
Subject 32 not found
Subject 33 not found
Subject 34 not found
Subject 36 not found
Subject 37 not found
Subject 38 not found


In [22]:
window_size = 5000 # 5s

extract_features_for_all_subjects(window_size)

Subject 1 not found
Subject 2 not found
Subject 3 not found
Subject 4 not found
Subject 5 not found
Subject 6 not found
Subject 7 not found
Subject 8 not found
Subject 9 not found
Subject 10 not found
Subject 11 not found
Subject 12 not found
Subject 13 not found
Subject 14 not found
Subject 15 not found
Subject 16 not found
Subject 17 not found
Subject 18 not found
Subject 20 not found
Subject 21 not found
Subject 22 not found
Subject 23 not found
Subject 24 not found
Subject 25 not found
Subject 29 not found
Subject 31 not found
Subject 32 not found
Subject 33 not found
Subject 34 not found
Subject 36 not found
Subject 37 not found
Subject 38 not found


In [23]:
window_size = 8000 # 8s

extract_features_for_all_subjects(window_size)

Subject 1 not found
Subject 2 not found
Subject 3 not found
Subject 4 not found
Subject 5 not found
Subject 6 not found
Subject 7 not found
Subject 8 not found
Subject 9 not found
Subject 10 not found
Subject 11 not found
Subject 12 not found
Subject 13 not found
Subject 14 not found
Subject 15 not found
Subject 16 not found
Subject 17 not found
Subject 18 not found
Subject 20 not found
Subject 21 not found
Subject 22 not found
Subject 23 not found
Subject 24 not found
Subject 25 not found
Subject 29 not found
Subject 31 not found
Subject 32 not found
Subject 33 not found
Subject 34 not found
Subject 36 not found
Subject 37 not found
Subject 38 not found
