# Imports

In [42]:
from pathlib import Path
import re

from more_itertools import powerset

import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.formula.api import ols

from constants import DataSplit, Model, Metric, DATASET_SYMBOLS, CONSENSUS_BASELINES

# Script Parameters

In [43]:
data_dir = r'./out'  # Directory containing experiment data to analyze
concat_results = True  # If True, join all files within data_dir (and subdirectories, used with replications). If False, only use <data_dir>/results.csv
alpha = 0.05  # Desired alpha for statistical testing (threshold for ANOVA model reduction)

# Print settings
only_reduced = True  # Only display/print reduced models
latex_output = False  # Print LaTeX instead of DataFrames
combine_anova_latex = True  # Combine full and reduced ANOVA models into one LaTeX table (REQUIRES latex_output = True, OVERRIDES only_reduced)

# Function Definitions

In [44]:
# Pivot table aggregation technique
pivot_agg_func = (lambda x: f'${np.mean(x):.3f} \\pm {np.std(x):.3f}$') if latex_output else [np.mean, np.std]

# These functions are used to clean up the ANOVA and LaTeX printouts
def clean_effect_name(name):
    return ':'.join(map(lambda x: x[0], re.findall(r'"([^"]*)"', name))) if ('"' in name) else name

def fix_ordering(df, level=1):
    return df.reindex(columns=df.columns.reindex([metric.value for metric in Metric], level=level)[0])

def get_style(df):
    return df.style.set_table_styles([
        {'selector': 'toprule', 'props': ':hline;'},
        {'selector': 'midrule', 'props': ':hline\hline;'},
        {'selector': 'bottomrule', 'props': ':hline;'}
    ])

def get_anova_latex(df, name):
    df = df.loc[:, (slice(None), ['Coefficient','PR(>F)'])]
    df = df.rename(index={'Residual/Intercept': 'Intercept'}, columns={'PR(>F)': 'P-value'})
    df = fix_ordering(df, level=0)

    s = get_style(df)
    s.format({(metric, col): f'${{:.{precision}f}}$'
              for metric in Metric
              for col, precision in {'Coefficient':5, 'P-value':3}.items()})
    if combine_anova_latex:
        s.format_index('\\rotatebox[origin=c]{{90}}{{{}}}', level=0)

    col_format = '|c|'*combine_anova_latex + '|l|' + 'rc|'*(len(df.columns)//2)

    latex = s.to_latex(column_format=col_format, caption=name, multicol_align='|c|', position='htbp', position_float='centering')
    latex = latex.replace('$nan$','-') # replace NaNs with dashes
    latex = latex.replace('\\\\\nIntercept', '\\\\ \hdashline\nIntercept') # put hdashline before intercept row
    latex = latex.replace('\hline \hdashline', '\hline') # remove hdashline if placed after hline (occurs when intercept is right under the toprule
    if combine_anova_latex:
        latex = latex.replace('\\\\\n &  &', '\\\\\nModel & Effect &') # index names (letting pandas handle this gets the header wrong)
        latex = latex.replace('\\\\\n\multirow', '\\\\ \hline\hline\n\multirow') # put double hline above reduced model
    else:
        latex = latex.replace('\n & Coefficient', '\nEffect & Coefficient') # index name (letting pandas handle this gets the header wrong)

    return latex

# Data Loading

In [45]:
path = Path(data_dir)

pattern = rf'{"**/*" if concat_results else ""}results.csv'

data = pd.concat([pd.read_csv(filename, index_col=0) for filename in path.glob(pattern)], ignore_index=True)
display(data)

Unnamed: 0,DecisionTree,RandomForest,LogisticRegression,Linear SVM,KNN,RandomBaseline,ConstantBaseline,PreviousBaseline,ConsensusBaseline,Forex,Bond,Index Futures,Commodities Futures,SPY,Random,Test,Accuracy,Macro F1,Weighted F1,ROC AUC
0,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,0.709172,0.708243,0.709352,0.780650
1,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,0.499002,0.496684,0.500502,0.507571
2,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,0.872483,0.872108,0.872575,0.957834
3,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,0.497006,0.493096,0.498072,0.496413
4,True,False,False,False,False,False,False,False,False,False,True,True,False,True,False,False,0.789197,0.785543,0.788714,0.873329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15295,False,False,False,False,False,False,False,False,True,True,False,True,True,True,False,True,0.495475,0.475140,0.501318,
15296,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,0.506523,0.493364,0.510346,
15297,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,0.503145,0.474699,0.511602,
15298,False,False,False,False,False,False,False,False,True,True,True,True,True,True,False,False,0.506523,0.493364,0.510346,


# Asset Presence Analysis (model-averaged, baseline models and training data excluded)

In [46]:
df = data.loc[data[DataSplit.TEST] & ~data[Model.RANDOM_BASELINE] & ~data[Model.CONSTANT_BASELINE] & ~data[Model.PREVIOUS_BASELINE] & ~data[Model.CONSENSUS_BASELINE]]

pivot = [
    df.loc[~df['Random']]
      .pivot_table(values=[metric for metric in Metric],
                   index=asset_type,
                   aggfunc=pivot_agg_func)
    for asset_type in DATASET_SYMBOLS.keys()
]

pivot += [
    df[~df[[asset_type for asset_type in DATASET_SYMBOLS.keys()] + ['Random']].any(axis=1)]
      .pivot_table(values=[metric for metric in Metric],
                   index='SPY',
                   aggfunc=pivot_agg_func),
    df.loc[df['Random']]
      .pivot_table(values=[metric for metric in Metric],
                   index='Random',
                   aggfunc=pivot_agg_func)
]

pivot = pd.concat(pivot, keys=[tab.index.name for tab in pivot], names=['Asset Type','Presence']) \
          .rename(index={'SPY': 'SPY-Only', 'Random': 'Random Data'}) \
          .reindex([True, False], level=1)

if latex_output:
    print(get_style(pivot).to_latex(column_format='|lc|ccc|', position='htbp', position_float='centering'))
else:
    display(fix_ordering(pivot))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,std,std,std,std
Unnamed: 0_level_1,Unnamed: 1_level_1,Accuracy,Macro F1,Weighted F1,ROC AUC,Accuracy,Macro F1,Weighted F1,ROC AUC
Asset Type,Presence,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Forex,True,0.530935,0.476261,0.495917,0.511788,0.029356,0.045655,0.035661,0.027427
Forex,False,0.538855,0.484173,0.503574,0.52309,0.026918,0.05031,0.039917,0.028217
Bond,True,0.536813,0.487089,0.506093,0.520721,0.030167,0.043927,0.0353,0.03132
Bond,False,0.532977,0.473345,0.493399,0.514157,0.026463,0.051212,0.039594,0.024695
Index Futures,True,0.538151,0.485306,0.504566,0.523909,0.029168,0.047475,0.038286,0.032037
Index Futures,False,0.53164,0.475128,0.494926,0.510969,0.027309,0.048384,0.037177,0.02241
Commodities Futures,True,0.539238,0.484114,0.50584,0.519746,0.031196,0.048229,0.037086,0.03267
Commodities Futures,False,0.530553,0.47632,0.493651,0.515133,0.024633,0.047857,0.038009,0.023116
SPY-Only,True,0.528814,0.465952,0.483449,0.510901,0.024477,0.044986,0.033837,0.014424
Random Data,True,0.523741,0.462406,0.47967,0.502412,0.024065,0.054962,0.043363,0.019051


# Asset Combinations (model-averaged, baseline models and training data excluded)

In [47]:
df = data.loc[data[DataSplit.TEST] & ~data[Model.RANDOM_BASELINE] & ~data[Model.CONSTANT_BASELINE] & ~data[Model.PREVIOUS_BASELINE] & ~data[Model.CONSENSUS_BASELINE]].copy()

for asset_type in DATASET_SYMBOLS.keys():
    df[asset_type] = df[asset_type].map({True: asset_type[0], False: ''})

df['Random'] = df['Random'].map({True: 'Random Data', False: ''})

df['Asset Combination'] = df[[asset_type for asset_type in DATASET_SYMBOLS.keys()] + ['Random']].apply(lambda x: ''.join(x.values.astype(str)), axis=1)

pivot = df.pivot_table(values=[metric for metric in Metric],
                       index='Asset Combination',
                       aggfunc=pivot_agg_func)

pivot = pivot.rename(index={'': 'SPY-Only'})
pivot = pivot.reindex([''.join(c) for c in powerset(''.join(asset_type[0] for asset_type in DATASET_SYMBOLS.keys()))] + ['SPY-Only', 'Random Data'])
pivot = pivot.drop(index='')

if latex_output:
    print(get_style(pivot).to_latex(column_format='|l|ccc|', position='htbp', position_float='centering'))
else:
    display(fix_ordering(pivot))

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
Unnamed: 0_level_1,Accuracy,Macro F1,Weighted F1,ROC AUC,Accuracy,Macro F1,Weighted F1,ROC AUC
Asset Combination,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
F,0.51984,0.464578,0.48077,0.502201,0.021387,0.054932,0.042922,0.013238
B,0.53651,0.487091,0.504226,0.523287,0.021979,0.045686,0.034775,0.016645
I,0.534004,0.479261,0.496312,0.520436,0.021749,0.045216,0.036194,0.019405
C,0.532031,0.46329,0.489193,0.501274,0.027954,0.052168,0.038417,0.019099
FB,0.528649,0.480183,0.497638,0.514243,0.024817,0.041825,0.03272,0.023977
FI,0.54007,0.486601,0.5033,0.523089,0.024984,0.051367,0.042973,0.028213
FC,0.534858,0.460599,0.487684,0.50755,0.024783,0.051716,0.037329,0.020352
BI,0.532933,0.474393,0.493322,0.522191,0.022601,0.055305,0.044183,0.02951
BC,0.548795,0.504627,0.522452,0.528001,0.028946,0.039061,0.029889,0.02469
IC,0.542434,0.489468,0.509061,0.532033,0.025419,0.054482,0.041251,0.026708


# Model Performance (dataset averaged, random data excluded)
## Out-sample

In [48]:
df = data.loc[~data['Random']]

pivot = [
    df.loc[df[DataSplit.TEST]]
      .pivot_table(values=[metric for metric in Metric],
                   index=model,
                   aggfunc=pivot_agg_func)
    for model in Model
]

pivot = pd.concat(pivot, keys=[tab.index.name for tab in pivot], names=['Model'])
pivot = pivot.loc[pivot.index.get_level_values(1)].droplevel(1)

if latex_output:
    print(get_style(pivot).to_latex(column_format='|l|ccc|', position='htbp', position_float='centering'))
else:
    display(fix_ordering(pivot))

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
Unnamed: 0_level_1,Accuracy,Macro F1,Weighted F1,ROC AUC,Accuracy,Macro F1,Weighted F1,ROC AUC
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
DecisionTree,0.510665,0.499409,0.508287,0.502165,0.023628,0.024706,0.023284,0.026341
RandomForest,0.535501,0.488149,0.508631,0.505507,0.020405,0.02335,0.021359,0.023863
Linear SVM,0.569656,0.400815,0.442721,0.53837,0.011752,0.029612,0.028253,0.018295
KNN,0.522114,0.50128,0.513969,0.512971,0.022962,0.023775,0.022354,0.024241
LogisticRegression,0.536539,0.511431,0.52512,0.528182,0.021049,0.02939,0.026266,0.029966
RandomBaseline,0.516363,0.512286,0.518188,0.5,0.025863,0.025654,0.025946,0.0
ConstantBaseline,0.566104,0.361456,0.409296,0.5,0.008061,0.003284,0.009553,0.0
PreviousBaseline,0.535565,0.527326,0.535495,,0.020922,0.020591,0.020893,
ConsensusBaseline,0.495972,0.474116,0.50353,,0.010374,0.007761,0.011036,


## In-sample

In [49]:
df = data.loc[~data['Random']]

pivot = [
    df.loc[~df[DataSplit.TEST]]
      .pivot_table(values=[metric for metric in Metric],
                   index=model,
                   aggfunc=pivot_agg_func)
    for model in Model
]

pivot = pd.concat(pivot, keys=[tab.index.name for tab in pivot], names=['Model','used'])
pivot = pivot.loc[pivot.index.get_level_values(1)].droplevel(1)

if latex_output:
    print(get_style(pivot).to_latex(column_format='|l|ccc|', position='htbp', position_float='centering'))
else:
    display(fix_ordering(pivot))

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,std
Unnamed: 0_level_1,Accuracy,Macro F1,Weighted F1,ROC AUC,Accuracy,Macro F1,Weighted F1,ROC AUC
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
DecisionTree,0.830631,0.827126,0.829314,0.898762,0.099054,0.103327,0.101483,0.085312
RandomForest,0.998363,0.998331,0.998349,0.99983,0.00984,0.010131,0.010015,0.001848
Linear SVM,0.571199,0.40889,0.440566,0.67503,0.023494,0.046327,0.045119,0.144062
KNN,0.711307,0.704566,0.709061,0.75557,0.112681,0.114817,0.113229,0.098197
LogisticRegression,0.638896,0.6227,0.629983,0.685808,0.057788,0.067987,0.066298,0.076808
RandomBaseline,0.501323,0.49965,0.502654,0.5,0.00818,0.008217,0.008108,0.0
ConstantBaseline,0.551458,0.35543,0.392056,0.5,0.007491,0.003136,0.008709,0.0
PreviousBaseline,0.47766,0.471936,0.477672,,0.008782,0.009732,0.008775,
ConsensusBaseline,0.507286,0.496815,0.510188,,0.007238,0.006879,0.007447,


# Statistical Analysis of Factor Effects (random data baseline excluded)
## ANOVA Model Generation

In [50]:
all_df = data.loc[data[DataSplit.TEST] & ~data['Random']]
all_df = all_df.replace({True: 1, False: -1}) # required to get coefficients. !does not change results!

anovas = {}

for model in Model:
    df = all_df.loc[data[model]]

    anovas[model] = {
        'full': {},
        'reduced': {}
    }

    if not latex_output:
        print(f'\nAnalyzing {model}...')

    for metric in Metric:
        if model in CONSENSUS_BASELINES and metric is Metric.ROC_AUC:
            continue

        relation = f'Q("{metric}") ~ ' + ' * '.join(['Q("'+asset_type+'")' for asset_type in DATASET_SYMBOLS.keys()])
        glm = ols(relation, data=df).fit()
        aov = sm.stats.anova_lm(glm, typ=1)

        aov = aov.rename(index={'Residual': 'Residual/Intercept'})
        coefs = glm.params.rename(index={'Intercept': 'Residual/Intercept'})
        coefs.name = 'Coefficient'

        anovas[model]['full'][metric.value] = aov.join(coefs)

        if not latex_output:
            print(f'\nReducing over {metric}...')

        no_reductions = True

        # while non-significant effects, remove the least significant effect and associated interactions and refit ANOVA model
        while (aov['PR(>F)'] > alpha).any():
            no_reductions = False
            rem_effect = aov['F'].idxmin()
            if not latex_output:
                print(f'removing effect {clean_effect_name(rem_effect)} (p={aov["PR(>F)"].max():.3f})')
            for effect in aov.index:
                if all(main_effect in effect.split(':') for main_effect in rem_effect.split(':')):
                    relation += f' - {effect}'

            glm = ols(relation, data=df).fit()
            aov = sm.stats.anova_lm(glm, typ=1)

        if no_reductions:
            if not latex_output:
                print('no effects removed')
        else:
            coefs = glm.params.rename(index={'Intercept': 'Residual/Intercept'})
            coefs.name = 'Coefficient'
            aov = aov.rename(index={'Residual': 'Residual/Intercept'})
            anovas[model]['reduced'][metric.value] = aov.join(coefs)


Analyzing DecisionTree...

Reducing over Accuracy...
removing effect B:C (p=0.986)
removing effect F:B:I (p=0.797)
removing effect F (p=0.785)
removing effect C (p=0.087)
removing effect B:I (p=0.076)
removing effect B (p=0.068)

Reducing over Macro F1...
removing effect F (p=0.837)
removing effect B:I (p=0.699)
removing effect B:C (p=0.504)
removing effect I (p=0.258)
removing effect B (p=0.086)

Reducing over Weighted F1...
removing effect B:I (p=0.947)
removing effect F (p=0.764)
removing effect B:C (p=0.720)
removing effect C (p=0.346)
removing effect I (p=0.052)

Reducing over ROC AUC...
removing effect F:B:I (p=0.994)
removing effect B:I:C (p=0.872)
removing effect F:C (p=0.821)
removing effect B:C (p=0.541)
removing effect B (p=0.420)
removing effect I:C (p=0.207)
removing effect F (p=0.074)
removing effect I (p=0.073)

Analyzing RandomForest...

Reducing over Accuracy...
removing effect B:C (p=0.699)
removing effect F:I (p=0.554)
removing effect F:C (p=0.476)
removing effect I

## Printouts

In [51]:
index_ordering = [':'.join(c) for c in powerset(''.join(asset_type[0] for asset_type in DATASET_SYMBOLS.keys()))][1:] + ['Residual/Intercept']

for model, to_join in anovas.items():
    full_join = to_join['full']
    reduced_join = to_join['reduced']

    full_model = pd.concat(full_join, axis=1)
    full_model = full_model.set_index(full_model.index.map(clean_effect_name)).reindex(index_ordering)

    reduced_model = None

    if len(reduced_join) == len(Metric):
        reduced_model = pd.concat(reduced_join, axis=1)
    elif len(reduced_join) > 0:
        to_join = {metric: reduced_join[metric]
                            if metric in reduced_join.keys()
                            else full_join[metric]
                   for metric in full_join.keys()}
        reduced_model = pd.concat(to_join, axis=1)

    if len(reduced_join) == 0: # no reduction occurred
        if latex_output: # print LaTeX
            if combine_anova_latex: # combined table format
                print(get_anova_latex(pd.concat({'Full \& Reduced': full_model}), f'{model}'))
            else: # individual table format
                print(get_anova_latex(full_model, f'{model} (Full \& Reduced)'))
        else: # display DataFrame
            print(f'\n{model} (Full & Reduced):')
            display(full_model)
    else:  # reduction occurred
        reduced_model = reduced_model.set_index(reduced_model.index.map(clean_effect_name)).reindex(index_ordering)
        reduced_model = reduced_model.dropna(how='all')
        if latex_output:  # print LaTeX
            if combine_anova_latex: # combine full and reduced ANOVA models into one table
                print(get_anova_latex(pd.concat({'Full':full_model, 'Reduced':reduced_model}), f'{model}'))
            else:  # individual table format
                if not only_reduced:  # print full model if desired
                    print(get_anova_latex(full_model, f'{model} (Full)'))
                print(get_anova_latex(reduced_model, f'{model} (Reduced)'))
        else:  # display DataFrames
            if not only_reduced:
                print(f'\n{model} (Full):')
                display(full_model)
            print(f'\n{model} (Reduced):')
            display(reduced_model)


DecisionTree (Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
B,,,,,,,,,,,...,0.002875,5.331766,0.021195,0.001896,,,,,,
I,1.0,0.00559,0.00559,10.128264,0.001517,0.002643,,,,,...,,,,,,,,,,
C,,,,,,,1.0,0.003404,0.003404,5.609455,...,,,,,1.0,0.004482,0.004482,6.503896,0.01095,-0.002367
Residual/Intercept,798.0,0.440466,0.000552,,,0.510665,798.0,0.484278,0.000607,,...,0.000539,,,0.508287,798.0,0.549888,0.000689,,,0.502165



RandomForest (Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,,,,,,,1.0,0.00789,0.00789,15.18813,...,0.005429,12.505089,0.000429,-0.002605,1.0,0.003334,0.003334,6.543518,0.01071173,-0.002041
B,1.0,0.016696,0.016696,42.65598,1.162122e-10,0.004568,,,,,...,0.004148,9.554344,0.002064,0.002277,1.0,0.00418,0.00418,8.205684,0.004286409,0.002286
I,,,,,,,1.0,0.006522,0.006522,12.554708,...,0.002975,6.853676,0.009014,-0.001928,1.0,0.004002,0.004002,7.85457,0.005193065,-0.002237
C,1.0,0.004036,0.004036,10.312908,0.001374026,0.002246,1.0,0.003549,0.003549,6.831312,...,,,,,1.0,0.004113,0.004113,8.072671,0.004609167,-0.002267
B:I,,,,,,,,,,,...,0.006855,15.791801,7.7e-05,-0.002927,1.0,0.02438,0.02438,47.85427,9.470471e-12,-0.00552
B:C,,,,,,,,,,,...,,,,,1.0,0.00318,0.00318,6.241273,0.01268235,0.001994
I:C,,,,,,,1.0,0.0047,0.0047,9.048528,...,,,,,1.0,0.008307,0.008307,16.30581,5.913801e-05,0.003222
Residual/Intercept,797.0,0.311947,0.000391,,,0.535501,795.0,0.412972,0.000519,,...,0.000434,,,0.508631,792.0,0.403494,0.000509,,,0.505507



Linear SVM (Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,0.000646,0.000646,37.366411,1.538894e-09,-0.000898,1.0,0.000611,0.000611,16.10629,...,0.000497,17.240415,3.654794e-05,-0.000788,1.0,0.003941,0.003941,518.268485,1.833045e-88,-0.00222
B,1.0,0.008991,0.008991,520.395093,7.746651e-89,0.003352,1.0,0.28873,0.28873,7616.138095,...,0.239512,8306.945551,0.0,0.017303,1.0,0.037569,0.037569,4940.419636,0.0,0.006853
I,1.0,0.004111,0.004111,237.932939,4.164122e-47,0.002267,1.0,0.058953,0.058953,1555.052502,...,0.046501,1612.781156,1.9859709999999998e-192,0.007624,1.0,0.140552,0.140552,18482.877079,0.0,0.013255
C,1.0,0.062729,0.062729,3630.924374,2.969416e-297,0.008855,1.0,0.072575,0.072575,1914.378886,...,0.151281,5246.844028,0.0,0.013751,1.0,0.015994,0.015994,2103.290001,3.8272829999999997e-224,0.004471
F:B,,,,,,,1.0,0.001581,0.001581,41.69395,...,0.001312,45.506397,2.954799e-11,0.001281,1.0,0.008086,0.008086,1063.362375,4.546196e-148,-0.003179
F:I,1.0,0.011115,0.011115,643.34805,3.120462e-104,0.003727,1.0,0.034349,0.034349,906.063503,...,0.029397,1019.579145,5.558698e-144,0.006062,1.0,5.6e-05,5.6e-05,7.367269,0.00678811,-0.000265
F:C,1.0,0.000159,0.000159,9.177949,0.002529503,0.000445,1.0,0.007357,0.007357,194.060142,...,0.0048,166.465283,1.1253009999999999e-34,-0.002449,1.0,0.002799,0.002799,368.136412,1.457435e-67,-0.001871
B:I,1.0,0.001486,0.001486,86.012331,1.680854e-19,0.001363,1.0,0.021918,0.021918,578.148092,...,0.009413,326.483961,2.81988e-61,-0.00343,1.0,0.002495,0.002495,328.162373,1.55717e-61,-0.001766
B:C,1.0,0.001679,0.001679,97.194485,1.064073e-21,0.001449,1.0,0.112525,0.112525,2968.195944,...,0.068136,2363.150741,8.03327e-239,0.009229,1.0,0.007438,0.007438,978.074569,5.157099e-140,0.003049
I:C,1.0,0.001036,0.001036,59.970626,2.958732e-14,0.001138,1.0,0.017697,0.017697,466.799885,...,0.015301,530.667849,4.4344089999999996e-90,0.004373,1.0,0.016246,0.016246,2136.344954,4.40586e-226,0.004506



KNN (Full & Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,0.098478,0.098478,7.446215e+29,0.0,-0.011095,1.0,0.161297,0.161297,1.216758e+30,...,0.1382702,6.446358e+29,0.0,-0.013147,1.0,0.132663,0.132663,4.72538e+29,0.0,-0.012877
B,1.0,0.006198166,0.006198166,4.686618e+28,0.0,0.002783,1.0,0.01836061,0.01836061,1.3850489999999998e+29,...,0.01856908,8.657173e+28,0.0,0.004818,1.0,0.01766262,0.01766262,6.291327e+28,0.0,0.004699
I,1.0,0.0001570356,0.0001570356,1.187393e+27,0.0,0.000443,1.0,0.0003787777,0.0003787777,2.857342e+27,...,0.0006486404,3.024055e+27,0.0,0.0009,1.0,0.001299789,0.001299789,4.629777e+27,0.0,-0.001275
C,1.0,0.06016221,0.06016221,4.549044e+29,0.0,0.008672,1.0,0.0187742,0.0187742,1.4162479999999998e+29,...,0.03997216,1.86356e+29,0.0,0.007069,1.0,0.04958472,0.04958472,1.76618e+29,0.0,0.007873
F:B,1.0,0.03340729,0.03340729,2.526024e+29,0.0,-0.006462,1.0,0.01036772,0.01036772,7.820983e+28,...,0.01402022,6.536428e+28,0.0,-0.004186,1.0,0.01299395,0.01299395,4.628372e+28,0.0,-0.00403
F:I,1.0,0.004280521,0.004280521,3.236629e+28,0.0,0.002313,1.0,0.02769214,0.02769214,2.08898e+29,...,0.01846585,8.609045e+28,0.0,0.004804,1.0,1.052232e-05,1.052232e-05,3.747992e+25,0.0,-0.000115
F:C,1.0,0.06003819,0.06003819,4.539667e+29,0.0,-0.008663,1.0,0.09398185,0.09398185,7.089601999999999e+29,...,0.08391189,3.9120939999999995e+29,0.0,-0.010242,1.0,0.07979244,0.07979244,2.842162e+29,0.0,-0.009987
B:I,1.0,0.008100654,0.008100654,6.125146e+28,0.0,0.003182,1.0,0.02545247,0.02545247,1.9200289999999998e+29,...,0.004442562,2.071187e+28,0.0,-0.002357,1.0,0.00112559,0.00112559,4.00929e+27,0.0,-0.001186
B:C,1.0,0.0004839456,0.0004839456,3.659257e+27,0.0,0.000778,1.0,0.0299481,0.0299481,2.2591609999999997e+29,...,0.01600326,7.460949e+28,0.0,0.004473,1.0,0.009887813,0.009887813,3.521984e+28,0.0,-0.003516
I:C,1.0,0.002759586,0.002759586,2.086605e+28,0.0,-0.001857,1.0,0.03496387,0.03496387,2.63753e+29,...,0.01320014,6.154096e+28,0.0,0.004062,1.0,0.007479808,0.007479808,2.664266e+28,0.0,0.003058



LogisticRegression (Full & Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,0.034912,0.03491227,127877.978966,0.0,-0.006606,1.0,0.00154,0.001540464,4904.84224,...,0.004448218,15984.633153,0.0,-0.002358,1.0,0.071672,0.07167167,4489348.0,0.0,-0.009465
B,1.0,0.005545,0.005544701,20309.340394,0.0,-0.002633,1.0,0.052672,0.0526723,167708.756776,...,0.02369595,85151.208099,0.0,0.005442,1.0,0.002669,0.002668597,167154.7,0.0,0.001826
I,1.0,0.086817,0.08681743,317998.141756,0.0,0.010417,1.0,0.26053,0.2605299,829527.943975,...,0.2025623,727905.723864,0.0,0.015912,1.0,0.350918,0.3509181,21980700.0,0.0,0.020944
C,1.0,0.009,0.009000307,32966.660906,0.0,0.003354,1.0,0.068958,0.06895774,219561.675407,...,0.06864211,246664.831985,0.0,0.009263,1.0,0.011688,0.01168772,732091.9,0.0,0.003822
F:B,1.0,0.034797,0.03479719,127456.462096,0.0,-0.006595,1.0,0.104499,0.1044993,332726.075221,...,0.08690295,312285.026944,0.0,-0.010423,1.0,0.064527,0.06452675,4041806.0,0.0,-0.008981
F:I,1.0,0.000244,0.0002441696,894.353591,1.0195929999999999e-131,-0.000552,1.0,0.02585,0.02584983,82305.942667,...,0.01682749,60469.432132,0.0,-0.004586,1.0,0.000424,0.0004243762,26581.94,0.0,-0.000728
F:C,1.0,5.7e-05,5.695254e-05,208.607915,4.229992e-42,0.000267,1.0,0.013441,0.01344144,42797.578728,...,0.008715122,31317.715275,0.0,-0.003301,1.0,0.012614,0.01261392,790106.6,0.0,0.003971
B:I,1.0,0.010237,0.01023712,37496.899693,0.0,-0.003577,1.0,0.068455,0.06845488,217960.555968,...,0.04945755,177725.29193,0.0,-0.007863,1.0,0.001006,0.001006066,63017.65,0.0,-0.001121
B:C,1.0,0.047833,0.04783306,175204.744867,0.0,0.007732,1.0,0.015383,0.01538309,48979.798834,...,0.01401075,50347.503168,0.0,0.004185,1.0,0.071246,0.07124574,4462668.0,0.0,0.009437
I:C,1.0,0.031561,0.03156084,115602.238769,0.0,0.006281,1.0,0.015516,0.01551569,49402.006353,...,0.0142057,51048.058554,0.0,0.004214,1.0,0.085856,0.08585635,5377843.0,0.0,0.01036



RandomBaseline (Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,1.445012e-08,1.445012e-08,2.634286e+22,0.0,4e-06,1.0,9.191901e-09,9.191901e-09,3.552147e+22,...,1.996521e-08,1.775295e+22,0.0,5e-06,,,,,,
B,1.0,0.06042129,0.06042129,1.101492e+29,0.0,0.008691,1.0,0.05667176,0.05667176,2.190041e+29,...,0.06090433,5.415579e+28,0.0,0.008725,,,,,,
I,1.0,0.0002929287,0.0002929287,5.34015e+26,0.0,-0.000605,1.0,0.0006447139,0.0006447139,2.491452e+27,...,0.0003426755,3.047052e+26,0.0,-0.000654,,,,,,
C,1.0,0.06430419,0.06430419,1.1722789999999998e+29,0.0,0.008966,1.0,0.05302926,0.05302926,2.049279e+29,...,0.07006189,6.229864e+28,0.0,0.009358,,,,,,
F:B,1.0,1.445012e-08,1.445012e-08,2.634286e+22,0.0,-4e-06,1.0,9.191901e-09,9.191901e-09,3.552147e+22,...,1.996521e-08,1.775295e+22,0.0,-5e-06,,,,,,
F:I,1.0,1.445012e-08,1.445012e-08,2.634286e+22,0.0,-4e-06,1.0,9.191901e-09,9.191901e-09,3.552147e+22,...,1.996521e-08,1.775295e+22,0.0,-5e-06,,,,,,
F:C,1.0,1.445012e-08,1.445012e-08,2.634286e+22,0.0,-4e-06,1.0,9.191901e-09,9.191901e-09,3.552147e+22,...,1.996521e-08,1.775295e+22,0.0,-5e-06,,,,,,
B:I,1.0,0.03362253,0.03362253,6.129456e+28,0.0,-0.006483,1.0,0.03096591,0.03096591,1.1966559999999999e+29,...,0.03109948,2.765348e+28,0.0,-0.006235,,,,,,
B:C,1.0,0.00561726,0.00561726,1.024038e+28,0.0,0.00265,1.0,0.006515396,0.006515396,2.51783e+28,...,0.005384766,4.788104e+27,0.0,0.002594,,,,,,
I:C,1.0,0.3701943,0.3701943,6.748717e+29,0.0,0.021511,1.0,0.3779681,0.3779681,1.460632e+30,...,0.3700718,3.2906579999999996e+29,0.0,0.021508,,,,,,



ConstantBaseline (Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,...,Weighted F1,Weighted F1,Weighted F1,Weighted F1,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC,ROC AUC
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,...,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,1.568013e-07,1.568013e-07,1.093112e+23,0.0,1.4e-05,1.0,2.674921e-08,2.674921e-08,6.213118e+22,...,2.16094e-07,9.66276e+23,0.0,1.6e-05,1.0,1.329972e-28,1.329972e-28,4.949662,0.026375,4.163336e-16
B,1.0,0.002584296,0.002584296,1.801596e+27,0.0,0.001797,1.0,0.000432496,0.000432496,1.004571e+27,...,0.003609471,1.613995e+28,0.0,0.002124,,,,,,
I,1.0,0.0001363344,0.0001363344,9.504312e+25,0.0,0.000413,1.0,2.170232e-05,2.170232e-05,5.040862e+25,...,0.0001969902,8.808525e+26,0.0,0.000496,,,,,,
C,1.0,0.0433558,0.0433558,3.022472e+28,0.0,0.007362,1.0,0.007204023,0.007204023,1.6733e+28,...,0.06085501,2.7211649999999996e+29,0.0,0.008722,,,,,,
F:B,1.0,1.568013e-07,1.568013e-07,1.093112e+23,0.0,-1.4e-05,1.0,2.674921e-08,2.674921e-08,6.213118e+22,...,2.16094e-07,9.66276e+23,0.0,-1.6e-05,,,,,,
F:I,1.0,1.568013e-07,1.568013e-07,1.093112e+23,0.0,-1.4e-05,1.0,2.674921e-08,2.674921e-08,6.213118e+22,...,2.16094e-07,9.66276e+23,0.0,-1.6e-05,,,,,,
F:C,1.0,1.568013e-07,1.568013e-07,1.093112e+23,0.0,-1.4e-05,1.0,2.674921e-08,2.674921e-08,6.213118e+22,...,2.16094e-07,9.66276e+23,0.0,-1.6e-05,,,,,,
B:I,1.0,0.001711348,0.001711348,1.193036e+27,0.0,0.001463,1.0,0.0002776303,0.0002776303,6.448603e+26,...,0.002441586,1.091769e+28,0.0,0.001747,,,,,,
B:C,1.0,0.0006813323,0.0006813323,4.749786e+26,0.0,-0.000923,1.0,0.00011809,0.00011809,2.742911e+26,...,0.0009284723,4.151715e+27,0.0,-0.001077,,,,,,
I:C,1.0,0.0005512252,0.0005512252,3.842768e+26,0.0,0.00083,1.0,9.021523e-05,9.021523e-05,2.095456e+26,...,0.0007817634,3.495698e+27,0.0,0.000989,,,,,,



PreviousBaseline (Full & Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,Macro F1,Macro F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,2.837609e-06,2.837609e-06,6.37485e+23,0.0,6e-05,1.0,3.314457e-06,3.314457e-06,8.089893e+23,0.0,6.4e-05,1.0,3.193616e-06,3.193616e-06,9.2009e+23,0.0,6.3e-05
B,1.0,0.1845878,0.1845878,4.146871e+28,0.0,0.01519,1.0,0.1801031,0.1801031,4.395938e+28,0.0,0.015004,1.0,0.1837626,0.1837626,5.294254e+28,0.0,0.015156
I,1.0,0.06900714,0.06900714,1.550285e+28,0.0,0.009288,1.0,0.06929678,0.06929678,1.691389e+28,0.0,0.009307,1.0,0.06878781,0.06878781,1.981797e+28,0.0,0.009273
C,1.0,0.01369912,0.01369912,3.077586e+27,0.0,0.004138,1.0,0.004212335,0.004212335,1.028142e+27,0.0,0.002295,1.0,0.01347498,0.01347498,3.882181e+27,0.0,0.004104
F:B,1.0,2.837609e-06,2.837609e-06,6.37485e+23,0.0,-6e-05,1.0,3.314457e-06,3.314457e-06,8.089893e+23,0.0,-6.4e-05,1.0,3.193616e-06,3.193616e-06,9.2009e+23,0.0,-6.3e-05
F:I,1.0,2.837609e-06,2.837609e-06,6.37485e+23,0.0,-6e-05,1.0,3.314457e-06,3.314457e-06,8.089893e+23,0.0,-6.4e-05,1.0,3.193616e-06,3.193616e-06,9.2009e+23,0.0,-6.3e-05
F:C,1.0,2.837609e-06,2.837609e-06,6.37485e+23,0.0,-6e-05,1.0,3.314457e-06,3.314457e-06,8.089893e+23,0.0,-6.4e-05,1.0,3.193616e-06,3.193616e-06,9.2009e+23,0.0,-6.3e-05
B:I,1.0,0.03938534,0.03938534,8.848141e+27,0.0,-0.007017,1.0,0.04536263,0.04536263,1.107206e+28,0.0,-0.00753,1.0,0.03930574,0.03930574,1.13241e+28,0.0,-0.007009
B:C,1.0,0.01708752,0.01708752,3.838809e+27,0.0,-0.004622,1.0,0.0170052,0.0170052,4.150612e+27,0.0,-0.00461,1.0,0.01761178,0.01761178,5.074005e+27,0.0,-0.004692
I:C,1.0,0.003143824,0.003143824,7.062779e+26,0.0,-0.001982,1.0,0.003813663,0.003813663,9.30835e+26,0.0,-0.002183,1.0,0.003121364,0.003121364,8.992741e+26,0.0,-0.001975



ConsensusBaseline (Full & Reduced):


Unnamed: 0_level_0,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Accuracy,Macro F1,Macro F1,Macro F1,Macro F1,Macro F1,Macro F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1,Weighted F1
Unnamed: 0_level_1,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient,df,sum_sq,mean_sq,F,PR(>F),Coefficient
F,1.0,2.787305e-06,2.787305e-06,6.57999e+23,0.0,5.9e-05,1.0,3.324723e-06,3.324723e-06,9.122251e+23,0.0,6.4e-05,1.0,2.260818e-06,2.260818e-06,6.6787e+23,0.0,5.3e-05
B,1.0,0.001975174,0.001975174,4.662793e+26,0.0,-0.001571,1.0,0.008202611,0.008202611,2.250602e+27,0.0,-0.003202,1.0,0.00101031,0.00101031,2.984564e+26,0.0,-0.001124
I,1.0,0.001672096,0.001672096,3.947316e+26,0.0,-0.001446,1.0,0.0009691294,0.0009691294,2.659061e+26,0.0,-0.001101,1.0,0.002747927,0.002747927,8.117674e+26,0.0,-0.001853
C,1.0,0.05842379,0.05842379,1.37921e+28,0.0,0.008546,1.0,0.02306695,0.02306695,6.329024e+27,0.0,0.00537,1.0,0.06329243,0.06329243,1.869727e+28,0.0,0.008895
F:B,1.0,2.787305e-06,2.787305e-06,6.57999e+23,0.0,-5.9e-05,1.0,3.324723e-06,3.324723e-06,9.122251e+23,0.0,-6.4e-05,1.0,2.260818e-06,2.260818e-06,6.6787e+23,0.0,-5.3e-05
F:I,1.0,2.787305e-06,2.787305e-06,6.57999e+23,0.0,-5.9e-05,1.0,3.324723e-06,3.324723e-06,9.122251e+23,0.0,-6.4e-05,1.0,2.260818e-06,2.260818e-06,6.6787e+23,0.0,-5.3e-05
F:C,1.0,2.787305e-06,2.787305e-06,6.57999e+23,0.0,-5.9e-05,1.0,3.324723e-06,3.324723e-06,9.122251e+23,0.0,-6.4e-05,1.0,2.260818e-06,2.260818e-06,6.6787e+23,0.0,-5.3e-05
B:I,1.0,0.002295189,0.002295189,5.418253e+26,0.0,0.001694,1.0,4.928827e-05,4.928827e-05,1.352353e+25,0.0,0.000248,1.0,0.004508266,0.004508266,1.33179e+27,0.0,0.002374
B:C,1.0,1.382131e-05,1.382131e-05,3.262798e+24,0.0,0.000131,1.0,2.181993e-06,2.181993e-06,5.986871e+23,0.0,-5.2e-05,1.0,2.152654e-07,2.152654e-07,6.359172e+22,0.0,-1.6e-05
I:C,1.0,0.01132571,0.01132571,2.67366e+27,0.0,-0.003763,1.0,0.009605932,0.009605932,2.63564e+27,0.0,-0.003465,1.0,0.01352137,0.01352137,3.99436e+27,0.0,-0.004111
