In [None]:
import pandas as pd
import os
from utils import get_raw_results, get_mean_results, models_statistical_test
from IPython.display import display

In [None]:
metrics = {
    'roc_auc_score' : 'AUCROC',
    'f1_weighted' : 'F1-score',
    'precision_weighted' : 'PPV',
    'NPV' : 'NPV',
    'recall_weighted' : 'Sensitivity',
    'specificity' : 'Specificity',
}

In [None]:
main_model = 'LogisticRegression'
test_metric = 'roc_auc_score'

In [None]:
def get_table(raw_results, main_model, variability=True):
    main_model_df = pd.DataFrame(raw_results[main_model])
    results = {}
    for model in raw_results.keys():
        print(model)
        aux = {}
        df = pd.DataFrame(raw_results[model])
        for metric, metric_name in metrics.items():
            if variability:
                aux[metric_name] = f'{df[metric].mean():.3f}±{df[metric].std():.3f}'
            else:
                aux[metric_name] = f'{df[metric].mean():.3f}'
        if variability:
            if model == main_model:
                aux['p-value'] = 'Reference'
            else:
                pvalue = models_statistical_test(main_model_df[test_metric], df[test_metric])
                aux['p-value'] = pvalue
        results[model] = aux

    results_df = pd.DataFrame(results).T
    results_df.index.name = 'Model'
    results_df = results_df.reset_index()
    results_df.sort_values(by='AUCROC', ascending=False, inplace=True)
    return results_df

## Statistical tests across model architectures

In [None]:
path = '../tests/test1/cv-PT'

In [None]:
raw_results = get_raw_results(path)

In [None]:
results_df = get_table(raw_results, 'LogisticRegression')
results_df

In [None]:
results_df.to_csv('../data/results/models_results_CV_PT.csv', index=False)

## Statistical Analysis - compare solutions

In [None]:
path

In [None]:
solutions = {
    '../tests/test1/cv-PT' : 'LR CV All Features',
    '../tests/test1/cv-PT-reduced-13' : 'LR CV 13 Best Features',
}

In [None]:
test_metric = 'roc_auc_score'

In [None]:
model_test = 'LogisticRegression'

In [None]:
raw_results = {}
for path, name in solutions.items():
    print(name)
    raw_results.update({name : get_raw_results(path)[main_model]})

In [None]:
main_model_df = pd.DataFrame(raw_results['LR CV All Features'])

In [None]:
results_df = get_table(raw_results, 'LR CV All Features')

In [None]:
results_df

In [None]:
results_df.to_csv('../data/results/models_results_compare_CV_PT_reduced.csv', index=False)

### Mean Analysis - US dataset

In [None]:
solutions = {
    '../tests/test1/train-PT-test-US' : 'LR TT All Features',
    '../tests/test1/train-PT-test-US-reduced-13' : 'LR TT 13 Best Features',
}

In [None]:
test_metric = 'roc_auc_score'

In [None]:
model_test = 'LogisticRegression'

In [None]:
raw_results = {}
for path, name in solutions.items():
    print(name)
    raw_results.update({name : get_raw_results(path)[model_test]})

In [None]:
main_model_df = pd.DataFrame(raw_results['LR TT All Features'])

In [None]:
results_df = get_table(raw_results, 'LR TT All Features', variability=False)

In [None]:
results_df

In [None]:
results_df.to_csv('../data/results/models_results_compare_TT_PT_reduced.csv', index=False)