In [2]:
import numpy as np
import pandas as pd
from scipy.signal import welch
import math

In [3]:
def bandpower(data, sf, band, window_sec=None):
    band = np.array(band)
    freqs, psd = welch(data, sf, nperseg=int(window_sec * sf) if window_sec else None)
    idx_band = np.logical_and(freqs >= band[0], freqs <= band[1])
    return np.trapz(psd[idx_band], freqs[idx_band])  # Integral over the band


In [4]:
bands = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30),
    'gamma': (30, math.inf)  # Upper bound may vary by sampling freq
}


In [5]:
def extract_spectral_features(eeg_df, sf=250):
    features = []

    for i, row in eeg_df.iterrows():
        signal = row.values  # 1D EEG signal
        person_features = {}
        total_power = bandpower(signal, sf, (0.5, 100))  # Total power for normalization

        for band_name, band_range in bands.items():
            power = bandpower(signal, sf, band_range)
            person_features[f"{band_name}_power"] = power
            person_features[f"{band_name}_rel_power"] = power / total_power if total_power != 0 else 0
        
        features.append(person_features)

    return pd.DataFrame(features)


In [6]:
data = pd.read_csv('../data/preprocessed.csv')
data.describe()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,169,170,171,172,173,174,175,176,177,178
count,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,...,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0,4999.0
mean,2499.0,2.164633,-0.554607,-0.564193,-0.540193,-0.649433,-0.753712,-1.075914,-1.18135,-1.451674,...,1.1907,1.485347,1.662399,1.745655,1.89624,2.16639,2.260436,2.39172,2.331582,2.544677
std,1443.231328,1.153444,17.789305,17.597088,17.681466,17.723401,17.403211,17.384649,17.279831,17.366649,...,17.849879,17.741248,17.593795,17.58637,17.837506,17.770479,17.780897,17.60351,17.718653,17.52613
min,0.0,0.0,-146.09476,-82.371185,-81.84957,-90.5431,-90.80391,-103.40952,-83.58828,-83.58828,...,-82.371185,-103.8442,-72.46056,-75.32943,-125.925766,-81.50183,-82.19731,-82.19731,-116.362885,-89.934555
25%,1249.5,2.0,-9.345543,-9.258608,-9.519414,-9.693284,-9.432479,-9.693284,-10.127961,-10.562637,...,-7.780708,-7.259097,-7.432967,-7.432967,-6.998291,-6.911355,-6.814286,-6.82442,-6.780952,-6.389744
50%,2499.0,2.0,-0.391209,-0.652015,-0.652015,-0.73895,-0.73895,-0.999756,-1.260562,-1.347497,...,0.867644,1.086691,1.347497,1.434432,1.521367,1.869109,2.042979,2.129914,2.042979,2.477656
75%,3748.5,3.0,8.062149,8.041514,8.215385,8.215385,7.883883,7.780708,7.443468,7.172161,...,9.78022,10.301831,10.214896,10.301831,10.997314,10.823443,11.08425,11.301588,11.08425,11.25812
max,4998.0,4.0,105.409035,95.58535,91.93407,109.66886,87.06569,87.41343,77.85055,112.972404,...,97.75873,102.71404,93.4989,90.10842,87.32649,97.06325,91.58633,90.282295,84.54456,84.805374


In [7]:
eeg_df = data.drop(columns=['Unnamed: 0', '0'])
labels = data['0']

In [8]:

# Step 2: Extract features
spectral_features_df = extract_spectral_features(eeg_df, sf=250)

# Step 3: Merge with labels and proceed to model training
# labels = pd.read_csv("labels.csv")
full_df = spectral_features_df.join(labels)


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  return np.trapz(psd[idx_band], freqs[idx_band])  # Integral over the band


In [9]:
X = eeg_df
X_processed = spectral_features_df

y = labels

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.38      0.03      0.05       102
           1       0.50      0.07      0.12       140
           2       0.51      0.83      0.63       400
           3       0.70      0.79      0.74       208
           4       0.41      0.24      0.30       150

    accuracy                           0.54      1000
   macro avg       0.50      0.39      0.37      1000
weighted avg       0.52      0.54      0.48      1000



In [12]:
processed_X_train, processed_X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42, stratify=y)


In [13]:


clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(processed_X_train, y_train)

# Predict
y_pred = clf.predict(processed_X_test)

# Evaluate
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.59      0.50      0.54       102
           1       0.48      0.29      0.36       140
           2       0.56      0.65      0.60       400
           3       0.69      0.77      0.73       208
           4       0.48      0.43      0.45       150

    accuracy                           0.58      1000
   macro avg       0.56      0.53      0.54      1000
weighted avg       0.57      0.58      0.57      1000



we notice improvement in the accuracy, especially for the non-dominant class
