# Baselines

In [14]:
import notebook_setup

import pandas as pd

from src.utils.utils import get_data_dir
from src.utils.constants import Constants

In [15]:
DATA_PATH = get_data_dir()
PROCESSED_TRAIN_DATA_PATH = DATA_PATH / "processed" / "train_processed.csv"

train_df = pd.read_csv(PROCESSED_TRAIN_DATA_PATH)
test_df = pd.read_csv(DATA_PATH / "test.csv")

In [16]:
train_df.head()

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,568657,0,0.0,789577333,0,0.0,1825637311,20654,Other,0.0,0.0,0.25,0.0,0.166667,0.583333
1,582999,0,0.0,1552638400,0,0.0,1722186807,20230,LPD,0.0,0.857143,0.0,0.071429,0.0,0.071429
2,642382,0,0.0,14960202,12,1008.0,3254468733,5955,Other,0.0,0.0,0.0,0.0,0.0,1.0
3,751790,0,0.0,618728447,4,908.0,2898467035,38549,GPD,0.0,0.0,1.0,0.0,0.0,0.0
4,778705,0,0.0,52296320,0,0.0,3255875127,40955,Other,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
test_df.head()

Unnamed: 0,spectrogram_id,eeg_id,patient_id
0,853520,3911565283,6885
1,853521,3911565283,6885


## Uniform Baseline

This baseline assigns equal probabilities (1/6 each) to all six possible classes.

In [18]:
num_classes = len(Constants.TARGETS)
uniform_predictions = [1/num_classes] * num_classes
uniform_predictions

[0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666,
 0.16666666666666666]

In [19]:
uniform_test_predictions = pd.DataFrame([uniform_predictions] * len(test_df))
uniform_test_predictions.insert(0, Constants.EEG_ID_COL, test_df[Constants.EEG_ID_COL])
uniform_test_predictions.columns = [Constants.EEG_ID_COL] + Constants.TARGETS

uniform_test_predictions

Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667
1,3911565283,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667


## Mean Baseline

This baseline predicts the mean of the training labels for all test instances.

In [20]:
mean_predictions = train_df[Constants.TARGETS].mean()
mean_predictions

seizure_vote    0.144946
lpd_vote        0.142988
gpd_vote        0.105177
lrda_vote       0.066077
grda_vote       0.115532
other_vote      0.425280
dtype: float64

In [21]:
mean_test_predictions = pd.DataFrame([mean_predictions] * len(test_df))
mean_test_predictions.insert(0, Constants.EEG_ID_COL, test_df[Constants.EEG_ID_COL])
mean_test_predictions.columns = [Constants.EEG_ID_COL] + Constants.TARGETS
mean_test_predictions

Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.144946,0.142988,0.105177,0.066077,0.115532,0.42528
1,3911565283,0.144946,0.142988,0.105177,0.066077,0.115532,0.42528
