In [31]:
import pandas as pd
import numpy as np
import mne
import os
from mne.time_frequency import psd_array_welch
from scipy.signal import welch

EEG_DATA_PATH = "Dataset/eremus_dataset/pruned/train"
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv('/Users/harsh_dadwal/Harsh/coding/ml/music_eeg/Emotions_Train.csv')

In [32]:
def compute_features(eeg_data, sampling_rate=128):
    features = {}
    
    # Compute Power Spectral Density (PSD) using Welch's method
    psds, freqs = psd_array_welch(eeg_data, sfreq=sampling_rate, fmin=0.5, fmax=40, n_fft=2048)

    # Extract PSD for different frequency bands (Alpha, Beta, Delta, Theta)
    features['alpha_power'] = np.mean(psds[:, (freqs >= 8) & (freqs <= 12)], axis=1)  # Alpha (8-12 Hz)
    features['beta_power'] = np.mean(psds[:, (freqs >= 12) & (freqs <= 30)], axis=1)  # Beta (12-30 Hz)
    features['delta_power'] = np.mean(psds[:, (freqs >= 0.5) & (freqs <= 4)], axis=1)  # Delta (0.5-4 Hz)
    features['theta_power'] = np.mean(psds[:, (freqs >= 4) & (freqs <= 8)], axis=1)    # Theta (4-8 Hz)
    
    # Variance of the EEG data
    features['variance'] = np.var(eeg_data, axis=1)

    # Mean of the EEG data
    features['mean'] = np.mean(eeg_data, axis=1)

    return features

In [33]:
import os
import mne
import numpy as np

class ResidualNan(Exception):
    pass

def interpolate(raw_data):
    """
    Interpolate NaN values in the EEG data. Replace large values (artifacts) with NaNs first.
    """
    # Replace values greater than a threshold (e.g., 1e2) with NaNs
    raw_data[abs(raw_data) > 1e2] = np.nan

    # Find NaN indices
    nan_indices = np.where(np.isnan(raw_data))
    nan_indices = np.vstack(nan_indices).transpose()

    # Interpolate NaN values
    for channel, timepoint in nan_indices:
        if timepoint == 0 or timepoint == raw_data.shape[1] - 1:
            # Cannot interpolate if NaN is at the start or end, just skip this case
            continue

        # Get values before and after the NaN
        before = raw_data[channel, timepoint - 1]
        after = raw_data[channel, timepoint + 1]

        # Interpolate the NaN by averaging the surrounding values
        raw_data[channel, timepoint] = (before + after) / 2

    # Check if any NaNs remain
    nan_indices = np.where(np.isnan(raw_data))
    nan_indices = np.vstack(nan_indices).transpose()
    any_nan = nan_indices.shape[0] != 0

    if any_nan:
        raise ResidualNan("Data still contain NaNs after interpolation")

    return raw_data

def open_and_interpolate(file):
    """
    Open the .fif file and perform interpolation for NaN values.
    """
    try:
        raw_file = mne.io.read_raw_fif(file, preload=True)
        raw_data = raw_file.get_data()
        raw_data = interpolate(raw_data)  # Handle NaN values
    except ResidualNan as e:
        print(f"Residual NaNs in {file}")
        return None
    return raw_data, raw_file.info['sfreq']  # Also return the sampling frequency

def load_eeg_data(trial_id, eeg_data_path):
    """
    Load EEG data from a .fif file and handle NaN values.

    Parameters:
        trial_id (str): ID of the trial (used to construct the file path).
        eeg_data_path (str): The path to the directory containing the .fif files.

    Returns:
        tuple: EEG data array and the sampling frequency.
    """
    # Construct the file path based on trial ID
    eeg_file_path = os.path.join(eeg_data_path, f'{trial_id}_eeg.fif')

    # Load the EEG data and interpolate NaN values
    eeg_data, sfreq = open_and_interpolate(eeg_file_path)
    
    if eeg_data is None:
        raise ValueError(f"Unable to load or preprocess EEG data from file: {eeg_file_path}")

    return eeg_data, sfreq

In [36]:
for index, row in df.iterrows():
    trial_id = row['id']  # Assuming 'id' corresponds to the trial_id in the .fif filename
    
    # Load the corresponding EEG data
    eeg_data, sfreq = load_eeg_data(trial_id, EEG_DATA_PATH)
    
    # Compute the features
    features = compute_features(eeg_data, sampling_rate=sfreq)
    
    # Add the features to the DataFrame
    df.loc[index, 'alpha_power'] = np.mean(features['alpha_power'])
    df.loc[index, 'beta_power'] = np.mean(features['beta_power'])
    df.loc[index, 'delta_power'] = np.mean(features['delta_power'])
    df.loc[index, 'theta_power'] = np.mean(features['theta_power'])
    df.loc[index, 'variance'] = np.mean(features['variance'])
    df.loc[index, 'mean'] = np.mean(features['mean'])


Opening raw data file Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif...


  raw_file = mne.io.read_raw_fif(file, preload=True)


AttributeError: 'NoneType' object has no attribute 'kind'

In [None]:
# Save the DataFrame with the new features
df.to_csv('train_with_features.csv', index=False)

print("Features added to the DataFrame and saved to 'train_with_features.csv'")

In [39]:
raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif", preload=True)

Opening raw data file Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif...


  raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif", preload=True)


AttributeError: 'NoneType' object has no attribute 'kind'

In [41]:
try:
    raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif", preload=False)
except Exception as e:
    print(f"Error reading the file: {e}")

Opening raw data file Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif...
Error reading the file: 'NoneType' object has no attribute 'kind'


  raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/3784258358_eeg.fif", preload=False)


In [55]:
try:
    raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/1036331628_eeg.fif", preload=True)
except Exception as e:
    print(f"Error reading the file: {e}")

Opening raw data file Dataset/eremus_dataset/pruned/train/1036331628_eeg.fif...
Error reading the file: 'NoneType' object has no attribute 'kind'


  raw = mne.io.read_raw_fif("Dataset/eremus_dataset/pruned/train/1036331628_eeg.fif", preload=True)


In [56]:
import os
files = os.listdir("Dataset/eremus_dataset/pruned/train")
for file in files:
    size = os.path.getsize(f"Dataset/eremus_dataset/pruned/train/{file}")
    print(f"{file}: {size} bytes")

8269497269_eeg.fif: 0 bytes
1743279177_eeg.fif: 0 bytes
8066471705_eeg.fif: 0 bytes
9187213890_eeg.fif: 0 bytes
5905614682_eeg.fif: 0 bytes
9195527806_eeg.fif: 0 bytes
6331565359_eeg.fif: 0 bytes
4824313470_eeg.fif: 0 bytes
1671587129_eeg.fif: 0 bytes
8026221597_eeg.fif: 0 bytes
4060619220_eeg.fif: 0 bytes
7933831782_eeg.fif: 0 bytes
9903556545_eeg.fif: 0 bytes
2395445698_eeg.fif: 0 bytes
3558627660_eeg.fif: 0 bytes
3784258358_eeg.fif: 0 bytes
7418038546_eeg.fif: 0 bytes
3455241290_eeg.fif: 0 bytes
5455527567_eeg.fif: 0 bytes
4274886008_eeg.fif: 0 bytes
6023281672_eeg.fif: 0 bytes
1415214270_eeg.fif: 0 bytes
5898249712_eeg.fif: 0 bytes
2644432196_eeg.fif: 0 bytes
3359788067_eeg.fif: 0 bytes
7962692464_eeg.fif: 0 bytes
7079957449_eeg.fif: 0 bytes
7833179198_eeg.fif: 0 bytes
5568667703_eeg.fif: 0 bytes
1831156008_eeg.fif: 0 bytes
8446090510_eeg.fif: 0 bytes
2059040442_eeg.fif: 0 bytes
4421422816_eeg.fif: 0 bytes
2642234932_eeg.fif: 0 bytes
1281311748_eeg.fif: 0 bytes
6361287970_eeg.fif: 