In [1]:

import pandas as pd
import numpy as np
import os
from tqdm import tqdm

# --- Configuration ---
PROCESSED_DATA_DIR = '../data/processed/'
INPUT_FILE = os.path.join(PROCESSED_DATA_DIR, 'task_windows.pkl')
OUTPUT_FILE = os.path.join(PROCESSED_DATA_DIR, 'features_dataset.csv')

# --- Load the preprocessed data ---
try:
    task_windows = pd.read_pickle(INPUT_FILE)
    print(f"Successfully loaded {len(task_windows)} task windows from {INPUT_FILE}")
except FileNotFoundError:
    print(f"Error: Input file not found at {INPUT_FILE}")
    print("Please run the '01_preprocessing.ipynb' notebook first.")
    # Stop execution if the input file doesn't exist
    exit()

# --- Feature Extraction Loop ---
all_task_features = []

for window in tqdm(task_windows, desc="Engineering Features"):
    eeg_data = window['EEG_Data']
    gsr_data = window['GSR_Data']

    # Initialize a dictionary to store features for this window
    features = {
        'Participant': window['Participant'],
        'TaskKey': window['TaskKey'],
        'CognitiveLoad': window['CognitiveLoad']
    }

    # --- EEG Feature Engineering ---
    # The EEG data is structured as (time_steps, channels)
    # The channels are ordered: Delta_TP9, Theta_TP9, ..., Gamma_TP10
    # There are 5 bands and 4 electrodes, so 20 channels in total.

    # We can calculate the mean and variance across time for each channel
    # and then average across the electrodes for each band.
    bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']
    
    # Calculate the mean power for each band across all electrodes
    # eeg_data[:, 0:20:5] will select all Delta columns (0, 5, 10, 15)
    mean_delta = np.mean(eeg_data[:, 0:20:5])
    mean_theta = np.mean(eeg_data[:, 1:20:5])
    mean_alpha = np.mean(eeg_data[:, 2:20:5])
    mean_beta  = np.mean(eeg_data[:, 3:20:5])
    mean_gamma = np.mean(eeg_data[:, 4:20:5])

    features['EEG_Delta_Mean'] = mean_delta
    features['EEG_Theta_Mean'] = mean_theta
    features['EEG_Alpha_Mean'] = mean_alpha
    features['EEG_Beta_Mean']  = mean_beta
    features['EEG_Gamma_Mean'] = mean_gamma
    
    # Calculate variance for each band
    features['EEG_Delta_Var'] = np.var(eeg_data[:, 0:20:5])
    features['EEG_Theta_Var'] = np.var(eeg_data[:, 1:20:5])
    features['EEG_Alpha_Var'] = np.var(eeg_data[:, 2:20:5])
    features['EEG_Beta_Var']  = np.var(eeg_data[:, 3:20:5])
    features['EEG_Gamma_Var'] = np.var(eeg_data[:, 4:20:5])

    # Calculate workload ratios using the means we already computed
    epsilon = 1e-9 # To prevent division by zero
    features['EEG_Theta_Alpha_Ratio'] = mean_theta / (mean_alpha + epsilon)
    features['EEG_Theta_Beta_Ratio'] = mean_theta / (mean_beta + epsilon)

    # --- GSR Feature Engineering ---
    features['GSR_Mean'] = np.mean(gsr_data)
    features['GSR_Var'] = np.var(gsr_data)
    
    all_task_features.append(features)

# --- Create and Save the Final DataFrame ---
features_df = pd.DataFrame(all_task_features)
features_df.to_csv(OUTPUT_FILE, index=False)

print("\nFeature engineering complete.")
print(f"Shape of the final feature dataset: {features_df.shape}")
print(f"Feature dataset saved to: {OUTPUT_FILE}")
print("\nFirst 5 rows of the dataset:")
print(features_df.head())


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_lo

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\ayush\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_lo

AttributeError: _ARRAY_API not found

Successfully loaded 1364 task windows from ../data/processed/task_windows.pkl


Engineering Features: 100%|██████████| 1364/1364 [00:00<00:00, 2840.65it/s]



Feature engineering complete.
Shape of the final feature dataset: (1364, 17)
Feature dataset saved to: ../data/processed/features_dataset.csv

First 5 rows of the dataset:
   Participant TaskKey  CognitiveLoad  EEG_Delta_Mean  EEG_Theta_Mean  \
0            1   1spl1              0        0.239537        0.176614   
1            1   1spl2              0        0.725022        0.417921   
2            1  1Item1              0        0.842528        0.515361   
3            1  1Item2              0        0.843536        0.521588   
4            1  1Item3              0        0.797471        0.449036   

   EEG_Alpha_Mean  EEG_Beta_Mean  EEG_Gamma_Mean  EEG_Delta_Var  \
0        0.414501       0.267745       -0.207820       0.200958   
1        0.597913       0.326772       -0.053515       0.170056   
2        0.674078       0.363511       -0.104016       0.188123   
3        0.717120       0.298959       -0.142325       0.284534   
4        0.638049       0.308217       -0.156758     