In [None]:
import pandas as pd
import os

In [None]:
mrs02_baseline_performance_path = '/Users/jk1/temp/opsum_prediction_output/baseline_models/mrs02_clinical_scores_results.csv'
death_baseline_performance_path = '/Users/jk1/temp/opsum_prediction_output/baseline_models/3m_death_clinical_scores_results.csv'
mimic_death_baseline_performance_path = '/Users/jk1/temp/opsum_prediction_output/baseline_models/mimic_death_baseline_score_results.csv'

In [None]:
output_dir = '/Users/jk1/Downloads'

In [None]:
mrs02_baseline_performance_df = pd.read_csv(mrs02_baseline_performance_path)
death_baseline_performance_df = pd.read_csv(death_baseline_performance_path)
mimic_death_baseline_performance_df = pd.read_csv(mimic_death_baseline_performance_path)

In [None]:
def preprocess_df(df, model_name, outcome, dataset_name):
    preprocessed_df = pd.DataFrame()
    # report result rounded to 3 decimal places with 95% CI
    preprocessed_df['ROC AUC'] = [f'{round(df["auc_test"].values[0], 3):.3f} ({round(df["auc_test_lower_ci"].values[0], 3):.3f}-{round(df["auc_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Matthew's Coefficient"] = [f'{round(df["matthews_test"].values[0], 3):.3f} ({round(df["matthews_test_lower_ci"].values[0], 3):.3f}-{round(df["matthews_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Accuracy"] = [f'{round(df["accuracy_test"].values[0], 3):.3f} ({round(df["accuracy_test_lower_ci"].values[0], 3):.3f}-{round(df["accuracy_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Precision (positive predictive value)"] = [f'{round(df["precision_test"].values[0], 3):.3f} ({round(df["precision_test_lower_ci"].values[0], 3):.3f}-{round(df["precision_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Recall (Sensitivity)"] = [f'{round(df["recall_test"].values[0], 3):.3f} ({round(df["recall_test_lower_ci"].values[0], 3):.3f}-{round(df["recall_test_upper_ci"].values[0], 3):.3f})']
    if 'specificity_test' in df.columns:
        preprocessed_df["Specificity"] = [f'{round(df["specificity_test"].values[0], 3):.3f} ({round(df["specificity_test_lower_ci"].values[0], 3):.3f}-{round(df["specificity_test_upper_ci"].values[0], 3):.3f})']
    else:
        preprocessed_df["Specificity"] = [f'NA']

    preprocessed_df['Model'] = [model_name]
    preprocessed_df['Outcome'] = [outcome]
    preprocessed_df['Dataset'] = [dataset_name]

    return preprocessed_df

In [None]:
# mrs02
mrs02_processed_thrive_df = preprocess_df(mrs02_baseline_performance_df[mrs02_baseline_performance_df.method_name == 'THRIVE'], 'THRIVE', '3M mrs02', 'GSU')
mrs02_processed_thrivec_df = preprocess_df(mrs02_baseline_performance_df[mrs02_baseline_performance_df.method_name == 'THRIVEC'], 'THRIVE-C', '3M mrs02', 'GSU')
mrs02_processed_HIAT_df = preprocess_df(mrs02_baseline_performance_df[mrs02_baseline_performance_df.method_name == 'HIAT'], 'HIAT', '3M mrs02', 'GSU')
mrs02_processed_span100_df = preprocess_df(mrs02_baseline_performance_df[mrs02_baseline_performance_df.method_name == 'span100'], 'SPAN-100', '3M mrs02', 'GSU')

# death
death_processed_thrive_df = preprocess_df(death_baseline_performance_df[death_baseline_performance_df.method_name == 'THRIVE'], 'THRIVE', '3M death', 'GSU')
death_processed_thrivec_df = preprocess_df(death_baseline_performance_df[death_baseline_performance_df.method_name == 'THRIVEC'], 'THRIVE-C', '3M death', 'GSU')
death_processed_HIAT_df = preprocess_df(death_baseline_performance_df[death_baseline_performance_df.method_name == 'HIAT'], 'HIAT', '3M death', 'GSU')
death_processed_span100_df = preprocess_df(death_baseline_performance_df[death_baseline_performance_df.method_name == 'span100'], 'SPAN-100', '3M death', 'GSU')

# mimic death
mimic_death_processed_thrive_df = preprocess_df(mimic_death_baseline_performance_df[mimic_death_baseline_performance_df.method_name == 'THRIVE'], 'THRIVE', '3M death', 'MIMIC')
mimic_death_processed_thrivec_df = preprocess_df(mimic_death_baseline_performance_df[mimic_death_baseline_performance_df.method_name == 'THRIVEC'], 'THRIVE-C', '3M death', 'MIMIC')
mimic_death_processed_HIAT_df = preprocess_df(mimic_death_baseline_performance_df[mimic_death_baseline_performance_df.method_name == 'HIAT'], 'HIAT', '3M death', 'MIMIC')
mimic_death_processed_span100_df = preprocess_df(mimic_death_baseline_performance_df[mimic_death_baseline_performance_df.method_name == 'span100'], 'SPAN-100', '3M death', 'MIMIC')


In [None]:
overall_mrs02_results_df = pd.concat([mrs02_processed_thrive_df, mrs02_processed_thrivec_df, mrs02_processed_HIAT_df, mrs02_processed_span100_df], ignore_index=True)
overall_death_results_df = pd.concat([death_processed_thrive_df, death_processed_thrivec_df, death_processed_HIAT_df, death_processed_span100_df], ignore_index=True)
overall_mimic_death_results_df = pd.concat([mimic_death_processed_thrive_df, mimic_death_processed_thrivec_df, mimic_death_processed_HIAT_df, mimic_death_processed_span100_df], ignore_index=True)

overall_results = pd.concat([overall_mrs02_results_df, overall_death_results_df, overall_mimic_death_results_df], ignore_index=True)

In [None]:
overall_results

In [None]:
# save results as csv
# overall_results.to_csv(os.path.join(output_dir, 'baseline_clinical_model_comparison_table.csv'), index=False)
