In [2]:
import numpy as np
import pandas as pd
from scipy.signal import welch
import math

In [3]:
def bandpower(data, sf, band, window_sec=None):
    band = np.array(band)
    freqs, psd = welch(data, sf, nperseg=int(window_sec * sf) if window_sec else None)
    idx_band = np.logical_and(freqs >= band[0], freqs <= band[1])
    return np.trapz(psd[idx_band], freqs[idx_band])  # Integral over the band


In [4]:
bands = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (12, 30),
    'gamma': (30, math.inf)  # Upper bound may vary by sampling freq
}


In [5]:
def extract_spectral_features(eeg_df, sf=250):
    features = []

    for i, row in eeg_df.iterrows():
        signal = row.values  # 1D EEG signal
        person_features = {}
        total_power = bandpower(signal, sf, (0.5, 100))  # Total power for normalization

        for band_name, band_range in bands.items():
            power = bandpower(signal, sf, band_range)
            person_features[f"{band_name}_power"] = power
            person_features[f"{band_name}_rel_power"] = power / total_power if total_power != 0 else 0
        
        features.append(person_features)

    return pd.DataFrame(features)


In [6]:
data = pd.read_csv('../data/preprocessed.csv')
data.describe()

Unnamed: 0,0,-1.6083028,-13.170696,-8.997802,1.347497,5.259585,-1.5213675,2.6515262,4.6510377,3.7816849,...,-8.041514,-6.5636144.1,-4.998779.1,-5.34652.1,-6.389744,-11.171185,-2.3907204,1.0866911,6.9113555,6.1289377
count,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,...,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0,4998.0
mean,2.165066,-0.554396,-0.561671,-0.538501,-0.649832,-0.754916,-1.075825,-1.182117,-1.452895,-1.383448,...,1.192548,1.486958,1.663732,1.747074,1.897898,2.169059,2.261367,2.391981,2.330666,2.54396
std,1.153153,17.791078,17.597945,17.68283,17.725151,17.404744,17.386387,17.281475,17.368172,17.507716,...,17.851187,17.742657,17.595303,17.587844,17.838905,17.771256,17.782554,17.605261,17.720308,17.52781
min,0.0,-146.09476,-82.371185,-81.84957,-90.5431,-90.80391,-103.40952,-83.58828,-83.58828,-136.70573,...,-82.371185,-103.8442,-72.46056,-75.32943,-125.925766,-81.50183,-82.19731,-82.19731,-116.362885,-89.934555
25%,2.0,-9.345543,-9.258608,-9.519414,-9.693284,-9.432479,-9.693284,-10.127961,-10.562637,-10.301831,...,-7.780708,-7.259097,-7.432967,-7.432967,-6.998291,-6.911355,-6.819353,-6.82442,-6.802686,-6.389744
50%,2.0,-0.391209,-0.622833,-0.652015,-0.73895,-0.73895,-0.999756,-1.304029,-1.347497,-1.608303,...,0.890232,1.086691,1.347497,1.434432,1.521367,1.869109,2.042979,2.138095,2.042979,2.477656
75%,3.0,8.072467,8.041514,8.215385,8.215385,7.892002,7.780708,7.443468,7.172161,7.432967,...,9.78022,10.301831,10.214896,10.301831,10.997314,10.823443,11.08425,11.323321,11.08425,11.25812
max,4.0,105.409035,95.58535,91.93407,109.66886,87.06569,87.41343,77.85055,112.972404,86.28327,...,97.75873,102.71404,93.4989,90.10842,87.32649,97.06325,91.58633,90.282295,84.54456,84.805374


In [7]:
eeg_df = data.drop(columns=['0'])
labels = data['0']

In [8]:

# Step 2: Extract features
spectral_features_df = extract_spectral_features(eeg_df, sf=250)

# Step 3: Merge with labels and proceed to model training
# labels = pd.read_csv("labels.csv")
full_df = spectral_features_df.join(labels)


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
  return np.trapz(psd[idx_band], freqs[idx_band])  # Integral over the band


In [9]:
X = eeg_df
X_processed = spectral_features_df

y = labels

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Evaluate
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.40      0.04      0.07       102
           1       0.36      0.07      0.12       140
           2       0.50      0.80      0.62       400
           3       0.69      0.80      0.74       208
           4       0.40      0.23      0.29       150

    accuracy                           0.54      1000
   macro avg       0.47      0.39      0.37      1000
weighted avg       0.50      0.54      0.47      1000



In [12]:
processed_X_train, processed_X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42, stratify=y)


In [13]:


clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(processed_X_train, y_train)

# Predict
y_pred = clf.predict(processed_X_test)

# Evaluate
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.59      0.50      0.54       102
           1       0.48      0.29      0.36       140
           2       0.56      0.65      0.60       400
           3       0.69      0.77      0.73       208
           4       0.48      0.43      0.45       150

    accuracy                           0.58      1000
   macro avg       0.56      0.53      0.54      1000
weighted avg       0.57      0.58      0.57      1000



: 

we notice improvement in the accuracy, especially for the non-dominant class
