In [1]:
import os
import pandas as pd

In [2]:
result_path = "../results/07-baseline"

exps = os.listdir(result_path)



### Only time-series data results

In [49]:
def get_results_baseline(result_path: str):
    result_dists = []
    exps = os.listdir(result_path)
    for exp in exps:
        result_dict = pd.read_pickle(os.path.join(result_path, exp))
        result_dict["hidden_units"], result_dict["layer"], result_dict["task"], result_dict["iteration"] = exp.split("-")[:4]
        result_dict["model_category"] = "GRU"
        result_dists.append(result_dict)
    result = pd.DataFrame(result_dists)
    result.sort_values(by=["task", "auc"], inplace=True, ascending=False)
    # top_resutls = result.drop_duplicates(subset=["task", "layer"])
    summary_results = result.groupby(by=["task", "layer"]).agg({"auc":['mean','std'], "auprc":['mean','std'], "F1":['mean','std'], 'acc':['mean','std']})
    summary_results.columns = summary_results.columns.map('_'.join).str.strip('_')
    summary_results.reset_index(inplace=True)
    summary_results["model_category"] = "GRU"
    for col in ['auc_mean', 'auc_std', 'auprc_mean', 'auprc_std',
       'F1_mean', 'F1_std', 'acc_mean', 'acc_std']:
       summary_results[col] = summary_results[col] * 100
    task_mapping = {
        "los_3": "LOS > 3 Days",
        "los_7": "LOS > 7 Days",
        "mort_hosp": "In-Hospital Mortality",
        "mort_icu": "In-ICU Mortality"
    }
    for k, v in task_mapping.items():
        summary_results.loc[summary_results["task"] == k, "task"] = v
    for matric in ["auc", "auprc", "F1", "acc"]:
        summary_results[matric] = summary_results[[f"{matric}_mean", f"{matric}_std"]].apply(lambda x: "{:.2f} +/- {:.3f}".format(x[0], x[1]), axis=1)
    summary_results = summary_results[["task", "model_category","layer", "auc", "auprc", "F1"]]
    return result, summary_results
    
resutls_07, summary_results_07 = get_results_baseline( "../results/07-baseline")
summary_results_07

Unnamed: 0,task,model_category,layer,auc,auprc,F1
0,LOS > 3 Days,GRU,GRU,69.56 +/- 0.336,64.01 +/- 0.320,55.25 +/- 1.528
1,LOS > 3 Days,GRU,LSTM,69.11 +/- 0.546,63.29 +/- 0.627,54.43 +/- 1.080
2,LOS > 7 Days,GRU,GRU,73.30 +/- 0.717,21.08 +/- 0.743,4.63 +/- 1.884
3,LOS > 7 Days,GRU,LSTM,72.45 +/- 0.589,19.68 +/- 0.864,2.99 +/- 1.777
4,In-Hospital Mortality,GRU,GRU,87.58 +/- 0.503,56.18 +/- 1.023,44.94 +/- 1.956
5,In-Hospital Mortality,GRU,LSTM,87.49 +/- 0.411,55.21 +/- 0.930,41.75 +/- 2.426
6,In-ICU Mortality,GRU,GRU,88.57 +/- 0.404,51.68 +/- 1.101,43.59 +/- 2.232
7,In-ICU Mortality,GRU,LSTM,88.34 +/- 0.415,50.38 +/- 1.174,40.89 +/- 2.595


### Baseline time-series and NLP data results

In [48]:
def get_results_multimodal(result_path: str):
    result_dists = []
    exps = os.listdir(result_path)
    for exp in exps:
        result_dict = pd.read_pickle(os.path.join(result_path, exp))
        result_dict["layer"], result_dict["hidden_units"], result_dict["embedding"], result_dict["task"], result_dict["iteration"]  = exp.split("-")[:5]
        result_dict["model_category"] = "Average Moltimodal"
        result_dists.append(result_dict)
    result = pd.DataFrame(result_dists)
    result.sort_values(by=["task", "auc"], inplace=True, ascending=False)
    summary_results = result.groupby(by=["task", "embedding"]).agg({"auc":['mean','std'], "auprc":['mean','std'], "F1":['mean','std'], 'acc':['mean','std']})
    summary_results.columns = summary_results.columns.map('_'.join).str.strip('_')
    summary_results.reset_index(inplace=True)
    summary_results["model_category"] = "Average Moltimodal"
    for col in ['auc_mean', 'auc_std', 'auprc_mean', 'auprc_std',
       'F1_mean', 'F1_std', 'acc_mean', 'acc_std']:
       summary_results[col] = summary_results[col] * 100
    task_mapping = {
        "los_3": "LOS > 3 Days",
        "los_7": "LOS > 7 Days",
        "mort_hosp": "In-Hospital Mortality",
        "mort_icu": "In-ICU Mortality"
    }
    for k, v in task_mapping.items():
        summary_results.loc[summary_results["task"] == k, "task"] = v
    for matric in ["auc", "auprc", "F1", "acc"]:
        summary_results[matric] = summary_results[[f"{matric}_mean", f"{matric}_std"]].apply(lambda x: "{:.2f} +/- {:.3f}".format(x[0], x[1]), axis=1)
    summary_results = summary_results[["task", "model_category","embedding", "auc", "auprc", "F1"]]
    return result, summary_results

resutls_08, summary_results_08 = get_results_multimodal( "../results/08-multimodal")
summary_results_08

Unnamed: 0,task,model_category,embedding,auc,auprc,F1
0,LOS > 3 Days,Average Moltimodal,concat,70.14 +/- 0.430,64.25 +/- 0.709,55.26 +/- 1.272
1,LOS > 3 Days,Average Moltimodal,fasttext,70.01 +/- 0.110,63.58 +/- 0.155,56.58 +/- 0.028
2,LOS > 3 Days,Average Moltimodal,word2vec,70.27 +/- 0.233,63.97 +/- 0.536,55.19 +/- 1.603
3,LOS > 7 Days,Average Moltimodal,concat,71.87 +/- 0.098,21.54 +/- 0.391,3.82 +/- 0.743
4,LOS > 7 Days,Average Moltimodal,fasttext,72.84 +/- 0.922,21.69 +/- 0.309,3.84 +/- 1.565
5,LOS > 7 Days,Average Moltimodal,word2vec,72.41 +/- 0.870,21.52 +/- 0.652,1.94 +/- 1.146
6,In-Hospital Mortality,Average Moltimodal,concat,88.01 +/- 0.128,59.11 +/- 0.362,48.13 +/- 0.435
7,In-Hospital Mortality,Average Moltimodal,fasttext,87.83 +/- 0.152,58.58 +/- 0.822,47.05 +/- 2.454
8,In-Hospital Mortality,Average Moltimodal,word2vec,87.94 +/- 0.113,58.79 +/- 0.747,47.97 +/- 1.138
9,In-ICU Mortality,Average Moltimodal,concat,88.72 +/- 0.108,51.69 +/- 0.058,43.44 +/- 2.591


## Proposed models

In [46]:
def get_results_proposed(result_path: str):
    result_dists = []
    exps = os.listdir(result_path)
    for exp in exps:
        result_dict = pd.read_pickle(os.path.join(result_path, exp))
        result_dict["sequence_name"], result_dict["hidden_unit_size"], result_dict["embedding"], result_dict["task"], result_dict["iteration"], result_dict["type_of_ner"] = exp.split("-")[:6]
        result_dict["model_category"] = "Proposed Model"
        result_dists.append(result_dict)
    result = pd.DataFrame(result_dists)
    result.sort_values(by=["task", "embedding", "iteration"], inplace=True, ascending=False)
    summary_results = result.groupby(by=["task", "embedding"]).agg({"auc":['mean','std'], "auprc":['mean','std'], "F1":['mean','std'], 'acc':['mean','std']})
    summary_results.columns = summary_results.columns.map('_'.join).str.strip('_')
    summary_results.reset_index(inplace=True)
    summary_results["model_category"] = "Proposed Model"
    for col in ['auc_mean', 'auc_std', 'auprc_mean', 'auprc_std',
       'F1_mean', 'F1_std', 'acc_mean', 'acc_std']:
       summary_results[col] = summary_results[col] * 100
    task_mapping = {
        "los_3": "LOS > 3 Days",
        "los_7": "LOS > 7 Days",
        "mort_hosp": "In-Hospital Mortality",
        "mort_icu": "In-ICU Mortality"
    }
    for k, v in task_mapping.items():
        summary_results.loc[summary_results["task"] == k, "task"] = v
    for matric in ["auc", "auprc", "F1", "acc"]:
        summary_results[matric] = summary_results[[f"{matric}_mean", f"{matric}_std"]].apply(lambda x: "{:.2f} +/- {:.3f}".format(x[0], x[1]), axis=1)
    summary_results = summary_results[["task", "model_category","embedding", "auc", "auprc", "F1"]]
    return result, summary_results

result_09, summary_results_09 = get_results_proposed("../results/09-cnn")
summary_results_09

Unnamed: 0,task,model_category,embedding,auc,auprc,F1
0,LOS > 3 Days,Proposed Model,concat,69.91 +/- 0.347,64.26 +/- 0.371,55.44 +/- 1.326
1,LOS > 3 Days,Proposed Model,fasttext,69.83 +/- 0.245,63.94 +/- 0.301,55.02 +/- 1.653
2,LOS > 3 Days,Proposed Model,word2vec,70.05 +/- 0.227,64.45 +/- 0.451,55.42 +/- 1.277
3,LOS > 7 Days,Proposed Model,concat,72.65 +/- 0.357,22.26 +/- 0.744,2.38 +/- 0.851
4,LOS > 7 Days,Proposed Model,fasttext,72.62 +/- 0.748,22.15 +/- 0.628,2.46 +/- 1.791
5,LOS > 7 Days,Proposed Model,word2vec,73.34 +/- 0.265,22.47 +/- 0.642,2.42 +/- 1.633
6,In-Hospital Mortality,Proposed Model,concat,88.03 +/- 0.201,58.07 +/- 0.526,46.01 +/- 1.626
7,In-Hospital Mortality,Proposed Model,fasttext,87.85 +/- 0.210,57.73 +/- 0.529,45.01 +/- 1.618
8,In-Hospital Mortality,Proposed Model,word2vec,88.10 +/- 0.233,58.43 +/- 0.517,46.10 +/- 1.380
9,In-ICU Mortality,Proposed Model,concat,88.45 +/- 0.260,51.92 +/- 0.665,42.00 +/- 3.203


## Best baseline models

In [50]:
summary_results = pd.concat([summary_results_07, summary_results_08], axis=0)


In [67]:
# table 3
summary_results.sort_values(by=["task", "model_category", "embedding"], inplace=True)
summary_results = summary_results[["task", "model_category", "layer", "embedding", 'auc', 'auprc', 'F1']]
summary_results.to_csv("../for_report/table_3.csv", index=False)

In [68]:
# table 4
summary_results_2 = summary_results.groupby(by=["task"]).agg({"auc":"max", "auprc": "max", "F1": "max"}).reset_index()
summary_results_2["model_category"] = "Best Baseline"
summary_results_2["embedding"] = ""
summary_results_2 = pd.concat([summary_results_2, summary_results_09], axis=0)
summary_results_2.sort_values(by=["task", "model_category", "embedding"], ascending=False, inplace=True)
summary_results_2 = summary_results_2[["task", "model_category", "embedding", 'auc', 'auprc', 'F1']]
summary_results_2.to_csv("../for_report/table_4.csv", index=False)

In [18]:
resutls = pd.concat([resutls_07, resutls_08], axis=0)
resutls.sort_values(by=["task", "model_category", "auc"], inplace=True, ascending=False)
top_resutls = resutls.drop_duplicates(subset=["task", "model_category"])
print(top_resutls.to_string(index=False))

     auc    auprc      acc       F1 hidden_units layer      task iteration     model_category embedding
0.892376 0.533598 0.941352 0.453961          128   GRU  mort_icu         7                GRU       NaN
0.890850 0.529524 0.941394 0.450216          128   GRU  mort_icu         1 Average Moltimodal  word2vec
0.884781 0.574463 0.913753 0.457308          256   GRU mort_hosp         2                GRU       NaN
0.880960 0.593695 0.916936 0.478261          256   GRU mort_hosp         1 Average Moltimodal    concat
0.748483 0.211612 0.917433 0.032345          256   GRU     los_7         8                GRU       NaN
0.734899 0.219046 0.917859 0.027322          128   GRU     los_7         1 Average Moltimodal  fasttext
0.703048 0.649674 0.669043 0.571599          128   GRU     los_3         8                GRU       NaN
0.704413 0.647540 0.666359 0.561552          256   GRU     los_3         1 Average Moltimodal    concat
