In [None]:
import pandas as pd
import os

## Subgroups for outcome MRS 02

In [None]:
subgroups_dir = '/Users/jk1/temp/opsum_prediction_output/transformer/3M_mrs02/test_set_subgroup_evaluation'

In [None]:
# loop through directories and find subgroup_results.csv file
all_subgroups_df = pd.DataFrame()
for root, dirs, files in os.walk(subgroups_dir):
    for file in files:
        if file.endswith('subgroup_results.csv'):
            subgroup_df = pd.read_csv(os.path.join(root, file))
            all_subgroups_df = pd.concat([all_subgroups_df, subgroup_df])

In [None]:
all_subgroups_df = all_subgroups_df[all_subgroups_df.fold == all_subgroups_df.selected_fold_on_cv.mode()[0]]

In [None]:
selected_columns = ['auc_test', 'auc_test_lower_ci', 'auc_test_upper_ci', 'matthews_test',
       'matthews_test_lower_ci', 'matthews_test_upper_ci', 'accuracy_test',
       'accuracy_test_lower_ci', 'accuracy_test_upper_ci', 'precision_test',
       'precision_test_lower_ci', 'precision_test_upper_ci', 'recall_test',
       'recall_test_lower_ci', 'recall_test_upper_ci', 'specificity_test',
       'specificity_test_lower_ci', 'specificity_test_upper_ci',
       'neg_pred_value_test', 'neg_pred_value_test_lower_ci',
       'neg_pred_value_test_upper_ci', 'outcome', 'subgroup_split', 'subgroup', 'n_pos_samples', 'n_total_samples']

In [None]:
all_subgroups_df = all_subgroups_df[selected_columns]

In [None]:
all_subgroups_df.to_csv(os.path.join(subgroups_dir, 'all_subgroups_results.csv'), index=False)

In [None]:
def preprocess_df(df, subgroup_split, subgroup, n_pos_samples=None, total_samples=None):
    preprocessed_df = pd.DataFrame()
    preprocessed_df['Subgroup'] = [subgroup]
    preprocessed_df['Subgroup split'] = [subgroup_split]

    # report result rounded to 3 decimal places with 95% CI
    preprocessed_df['ROC AUC'] = [f'{round(df["auc_test"].values[0], 3):.3f} ({round(df["auc_test_lower_ci"].values[0], 3):.3f}-{round(df["auc_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Matthew's Coefficient"] = [f'{round(df["matthews_test"].values[0], 3):.3f} ({round(df["matthews_test_lower_ci"].values[0], 3):.3f}-{round(df["matthews_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Accuracy"] = [f'{round(df["accuracy_test"].values[0], 3):.3f} ({round(df["accuracy_test_lower_ci"].values[0], 3):.3f}-{round(df["accuracy_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Precision (positive predictive value)"] = [f'{round(df["precision_test"].values[0], 3):.3f} ({round(df["precision_test_lower_ci"].values[0], 3):.3f}-{round(df["precision_test_upper_ci"].values[0], 3):.3f})']
    preprocessed_df["Recall (Sensitivity)"] = [f'{round(df["recall_test"].values[0], 3):.3f} ({round(df["recall_test_lower_ci"].values[0], 3):.3f}-{round(df["recall_test_upper_ci"].values[0], 3):.3f})']
    if 'specificity_test' in df.columns:
        preprocessed_df["Specificity"] = [f'{round(df["specificity_test"].values[0], 3):.3f} ({round(df["specificity_test_lower_ci"].values[0], 3):.3f}-{round(df["specificity_test_upper_ci"].values[0], 3):.3f})']
    else:
        preprocessed_df["Specificity"] = [f'NA']

    if (n_pos_samples is not None) and (total_samples is not None):
        preprocessed_df['Number of events'] = [f'{int(n_pos_samples)} ({round(n_pos_samples/total_samples*100, 1)}%)']

    return preprocessed_df

In [None]:
preprocessed_df = pd.DataFrame()
for split in all_subgroups_df.subgroup_split.unique():
    split_df = all_subgroups_df[all_subgroups_df.subgroup_split == split]
    preprocessed_df = pd.concat([preprocessed_df, preprocess_df(split_df, split, split_df.subgroup.values[0], split_df.n_pos_samples.values[0], split_df.n_total_samples.values[0])])

In [None]:
preprocessed_df

In [None]:
preprocessed_df.to_csv(os.path.join(subgroups_dir, 'all_subgroups_results_preprocessed.csv'), index=False)

## Subgroups for outcome Death

In [None]:
death_subgroups_dir = '/Users/jk1/temp/opsum_prediction_output/transformer/3M_Death/test_set_subgroup_evaluation'

In [None]:
# loop through directories and find subgroup_results.csv file
all_death_subgroups_df = pd.DataFrame()
for root, dirs, files in os.walk(death_subgroups_dir):
    for file in files:
        if file.endswith('subgroup_results.csv'):
            subgroup_df = pd.read_csv(os.path.join(root, file))
            all_death_subgroups_df = pd.concat([all_death_subgroups_df, subgroup_df])

In [None]:
all_death_subgroups_df = all_death_subgroups_df[all_death_subgroups_df.fold == all_death_subgroups_df.selected_fold_on_cv.mode()[0]]

In [None]:
all_death_subgroups_df = all_death_subgroups_df[selected_columns]

In [None]:
all_death_subgroups_df.to_csv(os.path.join(death_subgroups_dir, 'all_subgroups_results.csv'), index=False)

In [None]:
death_preprocessed_df = pd.DataFrame()
for split in all_death_subgroups_df.subgroup_split.unique():
    split_df = all_death_subgroups_df[all_death_subgroups_df.subgroup_split == split]
    death_preprocessed_df = pd.concat([death_preprocessed_df, preprocess_df(split_df, split, split_df.subgroup.values[0],split_df.n_pos_samples.values[0], split_df.n_total_samples.values[0])])

In [None]:
death_preprocessed_df

In [None]:
death_preprocessed_df.to_csv(os.path.join(death_subgroups_dir, 'all_subgroups_results_preprocessed.csv'), index=False)

## Subgroups for Death in hospital

In [None]:
death_in_hospital_subgroups_dir = '/Users/jk1/temp/opsum_prediction_output/transformer/Death_in_hospital/test_set_subgroup_evaluation'

In [None]:
# loop through directories and find subgroup_results.csv file
all_death_in_hospital_subgroups_df = pd.DataFrame()
for root, dirs, files in os.walk(death_in_hospital_subgroups_dir):
    for file in files:
        if file.endswith('subgroup_results.csv'):
            subgroup_df = pd.read_csv(os.path.join(root, file))
            all_death_in_hospital_subgroups_df = pd.concat([all_death_in_hospital_subgroups_df, subgroup_df])

In [None]:
all_death_in_hospital_subgroups_df = all_death_in_hospital_subgroups_df[all_death_in_hospital_subgroups_df.fold == all_death_in_hospital_subgroups_df.selected_fold_on_cv.mode()[0]]

In [None]:
all_death_in_hospital_subgroups_df = all_death_in_hospital_subgroups_df[selected_columns]

In [None]:
all_death_in_hospital_subgroups_df.to_csv(os.path.join(death_in_hospital_subgroups_dir, 'all_subgroups_results.csv'), index=False)

In [None]:
death_in_hospital_preprocessed_df = pd.DataFrame()
for split in all_death_in_hospital_subgroups_df.subgroup_split.unique():
    split_df = all_death_in_hospital_subgroups_df[all_death_in_hospital_subgroups_df.subgroup_split == split]
    death_in_hospital_preprocessed_df = pd.concat([death_in_hospital_preprocessed_df, preprocess_df(split_df, split, split_df.subgroup.values[0],split_df.n_pos_samples.values[0], split_df.n_total_samples.values[0])])
    

In [None]:
death_in_hospital_preprocessed_df

In [None]:
death_in_hospital_preprocessed_df.to_csv(os.path.join(death_in_hospital_subgroups_dir, 'all_subgroups_results_preprocessed.csv'), index=False)