In [60]:
import pandas as pd
import numpy as np
import mne
import os
from mne.time_frequency import psd_array_welch
from scipy.signal import welch

EEG_DATA_PATH = "Dataset/eremus_dataset/pruned/train"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv('/Users/harsh_dadwal/Harsh/coding/ml/music_eeg/Emotions_Train.csv')

In [61]:
def compute_features(eeg_data, sampling_rate=128):
    features = {}
    
    # Compute Power Spectral Density (PSD) using Welch's method
    psds, freqs = psd_array_welch(eeg_data, sfreq=sampling_rate, fmin=0.5, fmax=40, n_fft=2048)

    # Extract PSD for different frequency bands (Alpha, Beta, Delta, Theta)
    features['alpha_power'] = np.mean(psds[:, (freqs >= 8) & (freqs <= 12)], axis=1)  # Alpha (8-12 Hz)
    features['beta_power'] = np.mean(psds[:, (freqs >= 12) & (freqs <= 30)], axis=1)  # Beta (12-30 Hz)
    features['delta_power'] = np.mean(psds[:, (freqs >= 0.5) & (freqs <= 4)], axis=1)  # Delta (0.5-4 Hz)
    features['theta_power'] = np.mean(psds[:, (freqs >= 4) & (freqs <= 8)], axis=1)    # Theta (4-8 Hz)
    
    # Variance of the EEG data
    features['variance'] = np.var(eeg_data, axis=1)

    # Mean of the EEG data
    features['mean'] = np.mean(eeg_data, axis=1)

    return features

In [84]:
# Custom Exception for Residual NaNs
class ResidualNan(Exception):
    pass

def interpolate_nans(data):
    """Interpolate NaN values in the EEG data."""
    # Get the indices of NaNs
    nan_indices = np.where(np.isnan(data))
    nan_indices = np.vstack(nan_indices).transpose()

    # Interpolate NaNs for each channel
    for channel, timepoint in nan_indices:
        if timepoint == 0 or timepoint == data.shape[1] - 1:
            # If NaN is at the start or end, we can't interpolate, set to 0
            data[channel, timepoint] = 0
        else:
            # Interpolate using neighboring values
            before = data[channel, timepoint - 1]
            after = data[channel, timepoint + 1]
            data[channel, timepoint] = (before + after) / 2

    # Check if any NaNs remain
    nan_indices = np.where(np.isnan(data))
    if nan_indices[0].size > 0:
        raise ResidualNan("Data still contains NaNs after interpolation")

    return data

def load_eeg_data(trial_id):
    """Load and preprocess EEG data, handling NaNs."""
    # Path to the EEG .fif file
    eeg_file_path = f'Dataset/eremus_dataset/pruned/train/{trial_id}_eeg.fif'
    
    # Load the EEG data from the .fif file
    raw = mne.io.read_raw_fif(eeg_file_path, preload=True)
    
    # Extract the data and the sampling frequency
    eeg_data = raw.get_data()
    
    # Handle NaN values by interpolation
    try:
        eeg_data = interpolate_nans(eeg_data)
    except ResidualNan as e:
        print(f"Residual NaNs in {trial_id}_eeg.fif")
        return None, None  # Skip this trial if NaNs cannot be handled

    return eeg_data, raw.info['sfreq']

In [85]:
for index, row in df.iterrows():
    trial_id = row['id']  # Assuming 'id' corresponds to the trial_id in the .fif filename
    
    # Load the corresponding EEG data
    eeg_data, sfreq = load_eeg_data(trial_id)
    
    # Compute the features
    features = compute_features(eeg_data, sampling_rate=sfreq)
    
    # Add the features to the DataFrame
    df.loc[index, 'alpha_power'] = np.mean(features['alpha_power'])
    df.loc[index, 'beta_power'] = np.mean(features['beta_power'])
    df.loc[index, 'delta_power'] = np.mean(features['delta_power'])
    df.loc[index, 'theta_power'] = np.mean(features['theta_power'])
    df.loc[index, 'variance'] = np.mean(features['variance'])
    df.loc[index, 'mean'] = np.mean(features['mean'])


Opening raw data file Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif...
    Range : 9361 ... 19600 =     73.133 ...   153.125 secs
Ready.
Reading 0 ... 10239  =      0.000 ...    79.992 secs...
Effective window size : 16.000 (s)
Opening raw data file Dataset/eremus_dataset/pruned/train/2395445698_eeg.fif...
    Range : 20939 ... 31178 =    163.586 ...   243.578 secs
Ready.
Reading 0 ... 10239  =      0.000 ...    79.992 secs...
Effective window size : 16.000 (s)
Opening raw data file Dataset/eremus_dataset/pruned/train/3049220457_eeg.fif...
    Range : 32286 ... 42525 =    252.234 ...   332.227 secs
Ready.
Reading 0 ... 10239  =      0.000 ...    79.992 secs...
Effective window size : 16.000 (s)
Opening raw data file Dataset/eremus_dataset/pruned/train/2667905835_eeg.fif...
    Range : 44256 ... 54495 =    345.750 ...   425.742 secs
Ready.
Reading 0 ... 10239  =      0.000 ...    79.992 secs...
Effective window size : 16.000 (s)
Opening raw data file Dataset/eremus_dataset/prun

In [93]:
df.head()

Unnamed: 0.1,Unnamed: 0,spotify_track_id,song_title,label,emotion,session_type,subject_id,id,song_author,alpha_power,beta_power,delta_power,theta_power,variance,mean
0,0,06s6aloy62vytl3MnT6gfl,Times Like These - Live at the Pantages Theatr...,0,Amusement,personal,23,3784258358,['Foo Fighters'],2.258961e-12,1.019803e-12,4.672743e-11,4.411881e-12,2.553564e-10,2.346306e-09
1,1,0h9fnCSnbUgOEgibnQByFv,Everyday Life,2,Disappointment,personal,23,2395445698,['Coldplay'],2.269191e-12,7.160127e-13,2.629792e-11,3.541972e-12,2.081659e-10,1.011783e-08
2,2,1iArQTuOzxvrtniGmkyy92,Burden In My Hand,3,Anger,personal,23,3049220457,['Soundgarden'],3.825635e-12,1.182833e-12,6.786013e-11,5.654127e-12,5.708281e-10,-1.731397e-07
3,3,3A9vIxzGBjEfqmDK7H9exS,Pyramid Song,3,Fear,personal,23,2667905835,['Radiohead'],3.852573e-12,1.220438e-12,5.783148e-11,6.408991e-12,3.996517e-10,1.924873e-07
4,4,39kHMfF3dBMZMbOtoit1XF,On The Mend,0,Joy,personal,23,1281311748,['Foo Fighters'],3.885899e-12,1.389423e-12,8.96001e-11,7.100184e-12,4.647378e-10,6.473157e-08


In [94]:
# Save the DataFrame with the new features
df.to_csv('train_with_features.csv', index=False)

print("Features added to the DataFrame and saved to 'train_with_features.csv'")

Features added to the DataFrame and saved to 'train_with_features.csv'


In [57]:
import os
files = os.listdir("Dataset/eremus_dataset/pruned/train")
for file in files:
    size = os.path.getsize(f"Dataset/eremus_dataset/pruned/train/{file}")
    print(f"{file}: {size} bytes")

8269497269_eeg.fif: 1493932 bytes
1743279177_eeg.fif: 1480092 bytes
8066471705_eeg.fif: 1493420 bytes
9187213890_eeg.fif: 1486764 bytes
5905614682_eeg.fif: 1493804 bytes
9195527806_eeg.fif: 1447292 bytes
6331565359_eeg.fif: 1493676 bytes
4824313470_eeg.fif: 1493804 bytes
1671587129_eeg.fif: 1493420 bytes
8026221597_eeg.fif: 1503932 bytes
4060619220_eeg.fif: 1493548 bytes
7933831782_eeg.fif: 1493676 bytes
9903556545_eeg.fif: 1493804 bytes
2395445698_eeg.fif: 1317372 bytes
3558627660_eeg.fif: 1493676 bytes
3784258358_eeg.fif: 1317372 bytes
7418038546_eeg.fif: 1444348 bytes
3455241290_eeg.fif: 1493420 bytes
5455527567_eeg.fif: 1493548 bytes
4274886008_eeg.fif: 1423084 bytes
6023281672_eeg.fif: 1493548 bytes
1415214270_eeg.fif: 1494188 bytes
5898249712_eeg.fif: 1493804 bytes
2644432196_eeg.fif: 1493804 bytes
3359788067_eeg.fif: 1493804 bytes
7962692464_eeg.fif: 1494572 bytes
7079957449_eeg.fif: 1520204 bytes
7833179198_eeg.fif: 1493548 bytes
5568667703_eeg.fif: 1493548 bytes
1831156008_eeg