# Introduction

In this notebook, we show and describe the results for a baseline classifier, to be used a confrontation for all of the results in the paper.

As a baseline classifier, we shall employ a majority class prediction, i.e. a classifier which always predicts the class with the most values (which in this case is "stressed"). 

In [4]:
from pandas import MultiIndex, DataFrame, concat, set_option, read_csv
from IPython.display import display
from numpy import sqrt, round

def clean_names(x: str):
    if x[0] == "_":
        return x[1:]
    elif x[0] == " ":
        return x[2:-1]
    elif x[-1] == "_":
        return x[:-1]
    else:
        return x

unimodal_sessions: dict[str] = {
    # TODO: missing tests over non-unravelled! ->
    "DUMMY - unimodal - other joins": ["rootsession2"],
    "DUMMY - unimodal - unravelled": [
        "rootsession3",
    ],
}

final_df = DataFrame()
for session_name, sessions in unimodal_sessions.items():
    print(f"{session_name}")
    for session in sessions:
        print(f"Loading session {session}")
        path: str = f"../results/train/dummy/{session}/cross_val.csv"
        data = read_csv(path, index_col=0, engine="c")
        data = data.drop(
            columns=[col for col in data.columns if "deep_features" in col]
        )
        avg_df = data.mean(axis=0)
        new_idx = [
            tuple(
                clean_names(el3)
                for (el) in idx.split("(")
                for el2 in el.split(")")
                for el3 in el2.split(",")
                if el3 != "'hand_crafted_features'"
            )
            for idx in avg_df.index
        ]
        new_idx = [tuple((session_name, *idx)) for idx in new_idx]
        avg_df.index = MultiIndex.from_tuples(
            new_idx,
            names=["Session Name", "Join type", "Feature", "ML Model"],
        )
        new_data = DataFrame()
        new_data['mean'] = round(avg_df * 100, decimals=2).astype(float)
        # new_data['std'] = data.std(axis=0).values
        new_data['se'] = round(data.std(axis=0).values/sqrt(10) * 100, decimals=2).astype(float)

        final_df = concat([final_df, new_data])
    
set_option('display.max_rows', 500)
display(final_df.sort_values(by=['mean'], ascending=False))

DUMMY - unimodal - other joins
Loading session rootsession2
DUMMY - unimodal - unravelled
Loading session rootsession3


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,se
Session Name,Join type,Feature,ML Model,Unnamed: 4_level_1,Unnamed: 5_level_1
DUMMY - unimodal - other joins,feature_average,ECG_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,feature_average,GSR_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,window_average,ST_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,concat_feature_level,ST_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,window_average,GSR_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,concat_feature_level,GSR_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,feature_average,ST_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,concat_feature_level,ECG_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,window_average,ECG_features,MostFrequent,52.75,0.06
DUMMY - unimodal - other joins,feature_average,ECG_features,Uniform,51.06,1.04
