## 00. Imports

In [15]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

## 01. Load Raw Data

In [None]:
# We will need to request the user enters both the morning and evening EEG readings.
raw_EEG_morning = pd.read_csv('../raw_data/MEFAR/subject_1/1.morning/EEG.csv')
raw_EEG_evening = pd.read_csv('../raw_data/MEFAR/subject_1/2.evening/EEG.csv')

In [19]:
raw_EEG_morning.head()

Unnamed: 0,obs,time,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,Attention,Meditation,Derived,totPwr,class
0,1,2.1,1367388,477114,1008660,19742,118593,1106131,189004,130886,97,72,,4417518,X
1,2,3.1,52540,488771,7493,72752,374141,16447,27181,236978,29,51,,1276303,X
2,3,4.5,43867,6856,259509,1318926,400,47195,61020,309015,56,58,,2046788,X
3,4,5.5,105,597442,241684,211938,3208,283135,976308,370105,7,1,,2683925,X
4,5,6.5,56214,404868,100342,457757,118571,262792,523990,21782,36,82,,1946316,X


In [18]:
raw_EEG_evening.head()

Unnamed: 0,obs,time,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,Attention,Meditation,Derived,totPwr,class
0,1,2.0,1149360,456497,56001,832346,132280,873570,154621,16194,74,35,,3670869,X
1,2,3.0,35085,9453,206867,1888,142481,862558,411910,837471,18,19,,2507713,X
2,3,4.3,504241,237236,3401,267362,230055,274679,142840,402444,39,61,,2062258,X
3,4,5.3,276870,324591,642811,50609,5651,216119,642,172295,89,63,,1689588,X
4,5,6.3,2656,291863,155428,425454,434218,6273,373675,843327,63,50,,2532894,X


## 02. Preprocess Raw_Input:

In [None]:
# Initial pre-processing and resampling the data to 32Hz.
# Clean column names
raw_EEG_morning.columns = raw_EEG_morning.columns.str.strip()

# Convert `time` column to timedelta
raw_EEG_morning['time'] = pd.to_timedelta(raw_EEG_morning['time'], unit='s')

# Set time as index
raw_EEG_morning.set_index('time', inplace=True)

# Drop unwanted columns
raw_EEG_morning.drop(columns=['obs', 'Derived', 'totPwr', 'class'], inplace=True, errors='ignore')

# Sort by time
raw_EEG_morning.sort_index(inplace=True)

# Resample to 32 Hz → 1 sample every 31.25 ms
raw_EEG_morning = raw_EEG_morning.resample('31.25ms').mean().interpolate()

# Check result
raw_EEG_morning.head()

Unnamed: 0_level_0,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,Attention,Meditation
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0 days 00:00:02.100000,1367388.0,477114.0,1008660.0,19742.0,118593.0,1106131.0,189004.0,130886.0,97.0,72.0
0 days 00:00:02.131250,1326299.0,477478.28125,977373.5,21398.5625,126578.875,1072078.375,183947.03125,134201.375,94.875,71.34375
0 days 00:00:02.162500,1285210.0,477842.5625,946087.1,23055.125,134564.75,1038025.75,178890.0625,137516.75,92.75,70.6875
0 days 00:00:02.193750,1244121.0,478206.84375,914800.6,24711.6875,142550.625,1003973.125,173833.09375,140832.125,90.625,70.03125
0 days 00:00:02.225000,1203032.0,478571.125,883514.1,26368.25,150536.5,969920.5,168776.125,144147.5,88.5,69.375


In [21]:
# Normalize with Min-Max Scaling:
scaler = MinMaxScaler()
raw_EEG_morning_scaled = pd.DataFrame(
    scaler.fit_transform(raw_EEG_morning),
    columns=raw_EEG_morning.columns,
    index=raw_EEG_morning.index
)

In [22]:
raw_EEG_morning_scaled.head()

Unnamed: 0_level_0,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,Attention,Meditation
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0 days 00:00:02.100000,0.40621,0.093582,0.271994,0.006402,0.04202,0.28823,0.052512,0.036806,0.969697,0.717172
0 days 00:00:02.131250,0.394004,0.093654,0.263557,0.006939,0.044849,0.279357,0.051107,0.037738,0.948232,0.710543
0 days 00:00:02.162500,0.381797,0.093725,0.25512,0.007476,0.047679,0.270484,0.049702,0.038671,0.926768,0.703914
0 days 00:00:02.193750,0.369591,0.093797,0.246684,0.008014,0.050508,0.26161,0.048297,0.039603,0.905303,0.697285
0 days 00:00:02.225000,0.357385,0.093868,0.238247,0.008551,0.053338,0.252737,0.046892,0.040535,0.883838,0.690657


## 03. Create a function preprocess_eeg_data

In [23]:

def preprocess_eeg_data(csv_path):
    # Load the CSV
    raw_eeg = pd.read_csv(csv_path)

    # Clean column names
    raw_eeg.columns = raw_eeg.columns.str.strip()

    # Convert `time` column to timedelta
    raw_eeg['time'] = pd.to_timedelta(raw_eeg['time'], unit='s')

    # Set time as index
    raw_eeg.set_index('time', inplace=True)

    # Drop unwanted columns
    raw_eeg.drop(columns=['obs', 'Derived', 'totPwr', 'class'], inplace=True, errors='ignore')

    # Sort by time
    raw_eeg.sort_index(inplace=True)

    # Resample to 32 Hz (every 31.25 ms), interpolate missing values
    raw_eeg = raw_eeg.resample('31.25ms').mean().interpolate()

    # Normalize with Min-Max Scaling
    scaler = MinMaxScaler()
    scaled_eeg = pd.DataFrame(
        scaler.fit_transform(raw_eeg),
        columns=raw_eeg.columns,
        index=raw_eeg.index
    )

    return scaled_eeg

In [24]:
scaled_df = preprocess_eeg_data('../raw_data/MEFAR/subject_5/1.morning/EEG.csv')

In [25]:
scaled_df.head()

Unnamed: 0_level_0,Delta,Theta,Alpha1,Alpha2,Beta1,Beta2,Gamma1,Gamma2,Attention,Meditation
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0 days 00:00:02,0.001444,0.1797,0.034575,0.079059,0.013674,0.003646,0.103029,0.011784,0.808081,0.313131
0 days 00:00:02.031250,0.001617,0.176437,0.036696,0.07787,0.014163,0.006598,0.10027,0.012524,0.801136,0.319444
0 days 00:00:02.062500,0.001789,0.173173,0.038817,0.076681,0.014653,0.00955,0.097512,0.013264,0.794192,0.325758
0 days 00:00:02.093750,0.001961,0.16991,0.040938,0.075493,0.015142,0.012503,0.094753,0.014004,0.787247,0.332071
0 days 00:00:02.125000,0.002134,0.166647,0.043059,0.074304,0.015632,0.015455,0.091995,0.014744,0.780303,0.338384
