In [None]:
import os
import pickle
from prediction.outcome_prediction.baseline_models.baseline_scores import hiat_score, span100_score, thriveC_score, \
    thrive_score
from prediction.outcome_prediction.baseline_models.evaluation_helper_functions import evaluate_method
from prediction.outcome_prediction.data_loading.data_loader import load_external_data
import pandas as pd

from preprocessing.preprocessing_tools.normalisation.reverse_normalisation import reverse_normalisation

In [None]:
external_features_df_path = '/Users/jk1/temp/opsum_mimic/preprocessing/mimic_prepro_25012023_232713/preprocessed_features_25012023_232713.csv'
external_outcomes_df_path = '/Users/jk1/temp/opsum_mimic/preprocessing/mimic_prepro_25012023_232713/preprocessed_outcomes_25012023_232713.csv'
outcome = 'Death in hospital'

normalisation_parameters_path = '/Users/jk1/temp/opsum_mimic/preprocessing/mimic_prepro_25012023_232713/logs_25012023_232713/reference_population_normalisation_parameters.csv'

output_dir = '/Users/jk1/Downloads'

## Prepare data

In [None]:
test_X_np, test_y_np, test_features_lookup_table = load_external_data(external_features_df_path, external_outcomes_df_path,
                                                                          outcome)

In [None]:
admission_test_X_np = test_X_np[:,0,:]

In [None]:
test_X_np.shape, admission_test_X_np.shape, test_y_np.shape

In [None]:
admission_test_X_df = pd.DataFrame(admission_test_X_np, columns=test_features_lookup_table['sample_label'])
admission_test_X_df = admission_test_X_df.reset_index().rename(columns={'index': 'pidx'}).melt(id_vars='pidx',
                                                                                                   var_name='sample_label',
                                                                                                   value_name='value')

In [None]:
outcomes_df = pd.DataFrame(test_y_np, columns=[outcome])

In [None]:
# load normalisation parameters
normalisation_parameters_df = pd.read_csv(normalisation_parameters_path)

non_norm_admission_test_X_df = reverse_normalisation(admission_test_X_df, normalisation_parameters_df)

In [None]:
# reverse melting
non_norm_admission_test_X_df = non_norm_admission_test_X_df.pivot(index='pidx', columns='sample_label', values='value').reset_index().rename(columns={'index': 'pidx'})

In [None]:
non_norm_admission_test_df = pd.concat([non_norm_admission_test_X_df, outcomes_df], axis=1)
non_norm_admission_test_df.rename(columns={'pidx': 'case_admission_id'}, inplace=True)

In [None]:
non_norm_admission_test_df

## Create baseline clinical scores

In [None]:
non_norm_admission_test_df['HIAT_prob'] =  non_norm_admission_test_df.apply(
    lambda subject: hiat_score(
        subject['age'],
        subject['median_NIHSS'],
        subject['glucose']),
    axis=1)

# defined as mRS < 4 at discharge
non_norm_admission_test_df['HIAT good outcome pred'] = non_norm_admission_test_df['HIAT_prob'] > 0.5

In [None]:
non_norm_admission_test_df['span100_prob'] =  non_norm_admission_test_df.apply(
    lambda subject: span100_score(
        subject['age'],
        subject['median_NIHSS']),
    axis=1)
non_norm_admission_test_df['span100 good outcome pred'] = non_norm_admission_test_df['span100_prob'] > 0.5

In [None]:
non_norm_admission_test_df['THRIVE_prob'] = non_norm_admission_test_df.apply(
    lambda subject: thrive_score(
        subject['age'],
        subject['median_NIHSS'],
        subject['medhist_hypertension_yes'],
        subject['medhist_diabetes_yes'],
        subject['medhist_atrial_fibr._yes']
    ),
    axis=1)

non_norm_admission_test_df['THRIVE good outcome pred'] = non_norm_admission_test_df['THRIVE_prob'] > 0.5

In [None]:
non_norm_admission_test_df['THRIVEC_prob'] = non_norm_admission_test_df.apply(
    lambda subject: thriveC_score(
        subject['age'],
        subject['median_NIHSS'],
        subject['medhist_hypertension_yes'],
        subject['medhist_diabetes_yes'],
        subject['medhist_atrial_fibr._yes']
    ),
    axis=1)

non_norm_admission_test_df['THRIVEC good outcome pred'] = non_norm_admission_test_df['THRIVEC_prob'] > 0.5

In [None]:
non_norm_admission_test_df[['age', 'median_NIHSS',
        'medhist_hypertension_yes',
        'medhist_diabetes_yes',
        'medhist_atrial_fibr._yes',
        'THRIVE_prob', 'THRIVE good outcome pred', 'THRIVEC_prob', 'THRIVEC good outcome pred']]

Computes inverse scores to predict bad outcome

In [None]:
non_norm_admission_test_df['inv_THRIVEC_prob'] = 1 - non_norm_admission_test_df['THRIVEC_prob']
non_norm_admission_test_df['inv_THRIVEC good outcome pred'] = non_norm_admission_test_df['inv_THRIVEC_prob'] > 0.5

non_norm_admission_test_df['inv_THRIVE_prob'] = 1 - non_norm_admission_test_df['THRIVE_prob']
non_norm_admission_test_df['inv_THRIVE good outcome pred'] = non_norm_admission_test_df['inv_THRIVE_prob'] > 0.5

non_norm_admission_test_df['inv_HIAT_prob'] = 1 - non_norm_admission_test_df['HIAT_prob']
non_norm_admission_test_df['inv_HIAT good outcome pred'] = non_norm_admission_test_df['inv_HIAT_prob'] > 0.5

non_norm_admission_test_df['inv_span100_prob'] = 1 - non_norm_admission_test_df['span100_prob']
non_norm_admission_test_df['inv_span100 good outcome pred'] = non_norm_admission_test_df['inv_span100_prob'] > 0.5

In [None]:
outcome_thriveC_df, roc_auc_figure, outcome_THRIVE_C_bootstrapping_data, outcome_THRIVE_C_testing_data = evaluate_method('inv_THRIVEC', non_norm_admission_test_df[~non_norm_admission_test_df['THRIVEC_prob'].isna()], ground_truth=outcome)
outcome_thriveC_df['method_name'] = 'THRIVEC'
roc_auc_figure

outcome_thrive_df, roc_auc_figure, outcome_THRIVE_bootstrapping_data, outcome_THRIVE_testing_data = evaluate_method('inv_THRIVE', non_norm_admission_test_df[~non_norm_admission_test_df['THRIVE_prob'].isna()], ground_truth=outcome)
outcome_thrive_df['method_name'] = 'THRIVE'
roc_auc_figure

outcome_hiat_df, roc_auc_figure, outcome_HIAT_bootstrapping_data, outcome_HIAT_testing_data = evaluate_method('inv_HIAT', non_norm_admission_test_df[~non_norm_admission_test_df['HIAT_prob'].isna()], ground_truth=outcome)
outcome_hiat_df['method_name'] = 'HIAT'
roc_auc_figure

outcome_span100_df, roc_auc_figure, outcome_span100_bootstrapping_data, outcome_span100_testing_data = evaluate_method('inv_span100', non_norm_admission_test_df[~non_norm_admission_test_df['span100_prob'].isna()], ground_truth=outcome)
outcome_span100_df['method_name'] = 'span100'
roc_auc_figure

In [None]:
mimic_outcome_df = pd.concat([outcome_thriveC_df, outcome_thrive_df, outcome_hiat_df, outcome_span100_df])
mimic_outcome_df

In [None]:
mimic_outcome_df.to_csv(os.path.join(output_dir, f'mimic_{"_".join(outcome.split(" "))}_baseline_score_results.csv'))

In [None]:
# pickle.dump(outcome_THRIVE_C_testing_data, open(os.path.join(output_dir, f'mimic_{"_".join(outcome.split(" "))}_test_gt_and_pred.pkl'), 'wb'))