In [4]:
import pandas as pd
import numpy as np
from scipy.signal import welch
from scipy.stats import linregress

# Step 1: Load EEG data from CSV file
df = pd.read_csv('full_dataset.csv')

# Define EEG channel columns (assuming 'channel_1' to 'channel_8' are the relevant EEG channels)
eeg_channels = [f'channel_{i}' for i in range(1, 9)]

# Define sampling rate (Hz)
fs = 250  # Assuming 250Hz as the sampling rate

# Define window size (e.g., 500ms) and step size (e.g., 100ms)
window_size = int(0.5 * fs)  # 500ms window, which equals 125 samples at 250Hz
step_size = int(0.1 * fs)    # 100ms step, equals 25 samples

# Step 2: Define a function to extract features from each window
def extract_features_with_sliding_window(df, eeg_channels, fs=250, window_size=125, step_size=25):
    feature_list = []
    
    # Sliding window over the entire dataset
    for i in range(0, len(df) - window_size + 1, step_size):
        window_df = df.iloc[i:i + window_size]  # Extract the current window of data
        features = {}

        # Extract participant ID and timestep (from the first row of the window)
        features['participant_id'] = window_df['participant_id'].iloc[0]
        features['timestep_start'] = window_df['timestep'].iloc[0]
        features['timestep_end'] = window_df['timestep'].iloc[-1]

        # 1. Time-Domain Features (Max, Min, Mean, Amplitude Range)
        for channel in eeg_channels:
            channel_values = window_df[channel].values
            features[f'{channel}_max'] = np.max(channel_values)
            features[f'{channel}_min'] = np.min(channel_values)
            features[f'{channel}_mean'] = np.mean(channel_values)
            features[f'{channel}_amplitude_range'] = np.max(channel_values) - np.min(channel_values)

        # 2. Frequency-Domain Features (Alpha, Beta, Gamma Power using Welch method)
        for channel in eeg_channels:
            freqs, psd = welch(window_df[channel].values, fs=fs, nperseg=fs)
            alpha_power = np.sum(psd[(freqs >= 8) & (freqs <= 12)])
            beta_power = np.sum(psd[(freqs >= 13) & (freqs <= 30)])
            gamma_power = np.sum(psd[(freqs >= 30) & (freqs <= 50)])
            
            features[f'{channel}_alpha_power'] = alpha_power
            features[f'{channel}_beta_power'] = beta_power
            features[f'{channel}_gamma_power'] = gamma_power
        
        # 3. Signal Slope (Dynamic Feature)
        for channel in eeg_channels:
            channel_values = window_df[channel].values
            slope, intercept, r_value, p_value, std_err = linregress(np.arange(len(channel_values)), channel_values)
            features[f'{channel}_slope'] = slope

        # 4. Inter-Channel Differences (Spatial Features)
        for i in range(len(eeg_channels)):
            for j in range(i + 1, len(eeg_channels)):
                channel_i = window_df[eeg_channels[i]].values
                channel_j = window_df[eeg_channels[j]].values
                features[f'{eeg_channels[i]}_{eeg_channels[j]}_diff'] = np.mean(channel_i - channel_j)

        # Add the trigger value (assuming we're taking the trigger from the last timestep in the window)
        features['trigger'] = window_df['trigger'].iloc[-1]

        # Append extracted features for this window
        feature_list.append(features)

    # Convert list of feature dicts into a DataFrame
    return pd.DataFrame(feature_list)

# Step 3: Apply feature extraction with sliding window
features_df = extract_features_with_sliding_window(df, eeg_channels, fs=fs, window_size=window_size, step_size=step_size)

# Step 4: Save extracted features to a new CSV file
features_df.to_csv('extracted_features.csv', index=False)

# Step 5: Optional - print first few rows of the resulting features DataFrame to verify
print(features_df.head())

  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,


   participant_id  timestep_start  timestep_end  channel_1_max  channel_1_min  \
0               1               1           125       9.456022     -28.242964   
1               1              26           150       5.230984     -29.759351   
2               1              51           175      21.718463     -29.759351   
3               1              76           200      69.702880     -29.759351   
4               1             101           225      69.989725     -29.759351   

   channel_1_mean  channel_1_amplitude_range  channel_2_max  channel_2_min  \
0       -9.618033                  37.698986       3.100745     -21.342123   
1      -13.197389                  34.990335       5.314342     -21.342123   
2      -11.357816                  51.477814      13.265564     -21.342123   
3       -5.071852                  99.462231      36.655799     -21.342123   
4        7.432910                  99.749076      36.982369     -21.342123   

   channel_2_mean  ...  channel_4_channel_6_