This script creates csv files for to compare and evaluate the directly predicted morphology codes against the merged histology and behavior codes. 


In [1]:
import os
import pandas as pd
from datetime import date
from eval_helper_functions import load_df, get_mor_his_beh_scores, get_task_path_dict, get_pred_label_files_MT, get_pred_label_files

In [2]:
# input dir
result_collection = os.path.join("..", "model_output")
print(os.path.exists(result_collection))


True


In [3]:
# output dir
store_results_dir = os.path.join(".", f"{date.today()}_mor_his_beh_eval")
if not os.path.exists(store_results_dir):
    os.mkdir(store_results_dir)

In [4]:
tasks = ["mor", "his", "beh"]
mt_tasks = ["morsit", "behhissit"]

model_types = ["LOGR", 
               "CNN", "MTCNN",
               "HISAN", "MTHISAN",
               "BERT", "MTBERT"]
model_sorter = {"LOGR": "a",
                "CNN" : "b",
                "MTCNN": "c",
                "HISAN": "d",
                "MTHISAN": "e",
                "BERT": "f",
                "MTBERT": "g"}
# note: KB-BERT was abbreviated to BERT in the filenames

In [5]:
pred_true_labels_filename = "pred_true_labels"


model_summary = pd.DataFrame(columns=["acc_mor","acc_hisbeh", 
                                      "acc_his_from_mor", "acc_his", 
                                      "acc_beh_from_mor", "acc_beh",
                                      "f1_mor", "f1_hisbeh",  
                                      "f1_his_from_mor",  "f1_his",  
                                      "f1_beh_from_mor",  "f1_beh"])

model_summary_excl_other = pd.DataFrame(columns=["acc_mor","acc_hisbeh", 
                                                 "acc_his_from_mor", "acc_his", 
                                                 "acc_beh_from_mor", "acc_beh",
                                                 "f1_mor", "f1_hisbeh",  
                                                 "f1_his_from_mor",  "f1_his",  
                                                 "f1_beh_from_mor",  "f1_beh"])

for model_type in model_types:
    for cw_flag in [True, False]:
        unique_labels_pred_his_beh_excl_other = []
        unique_labels_pred_mor_excl_other = []
        if "MT" not in model_type:

            task_dict = get_task_path_dict(result_collection, model_type=model_type, tasks=tasks)
            pred_true_label_files = get_pred_label_files(task_path_dict=task_dict,
                                                    pred_true_labels_filename=f"{pred_true_labels_filename}.csv",
                                                    cw_flag=cw_flag)
        else:
            task_dict = get_task_path_dict(result_collection, model_type=model_type, tasks=mt_tasks)
            pred_true_label_files = get_pred_label_files_MT(task_path_dict=task_dict,
                                                            pred_true_labels_filename=f"{pred_true_labels_filename}.csv",
                                                            cw_flag=cw_flag)
        
        print(f"Creating evaluation for model {model_type}, (cws {cw_flag})")

        all_folds_scores = dict()
        all_folds_scores_excl_other = dict()
        for fold in range(1, 6):

            mor_df = load_df(pred_true_label_files["mor"][str(fold)])
            his_df = load_df(pred_true_label_files["his"][str(fold)])
            beh_df = load_df(pred_true_label_files["beh"][str(fold)])
            

            combo_df = pd.DataFrame({"true_mor" : mor_df["labels_true_alph"],
                                    "true_his" : his_df["labels_true_alph"],
                                    "true_beh" : beh_df["labels_true_alph"],

                                    "true_hisbeh": his_df["labels_true_alph"] + beh_df["labels_true_alph"],

                                    "pred_mor" : mor_df["labels_pred_alph"],
                                    "pred_his" : his_df["labels_pred_alph"],
                                    "pred_beh": beh_df["labels_pred_alph"],
                                    "pred_hisbeh" : his_df["labels_pred_alph"] + beh_df["labels_pred_alph"]},
                                    )
            

            
            assert list(combo_df["true_mor"]) == list(combo_df["true_hisbeh"])

            combo_df["pred_his_from_mor"] = [val[:-1] for val in mor_df["labels_pred_alph"].values]
            combo_df["pred_beh_from_mor"] = [val[-1] for val in mor_df["labels_pred_alph"].values]
            
            all_folds_scores[fold] = get_mor_his_beh_scores(combo_df, excl_other=False)

            # include only reports that do not belong to other class
            combo_excl_other = combo_df.copy(deep=True)
            combo_excl_other = combo_excl_other[combo_excl_other["true_mor"] != "99999"]
            assert len(combo_excl_other) < len(combo_df)

            assert "99999" not in combo_excl_other["true_mor"].unique()
            assert "9999" not in combo_excl_other["true_his"].unique()
            assert "9" not in combo_excl_other["true_beh"].unique()

            unique_labels_pred_his_beh_excl_other.append(len(combo_excl_other["pred_hisbeh"].unique()))
            unique_labels_pred_mor_excl_other.append(len(combo_excl_other["pred_mor"].unique()))
            all_folds_scores_excl_other[fold] = get_mor_his_beh_scores(combo_excl_other, excl_other=True)


        # create dfs and add mean
        all_folds_scores_df = pd.DataFrame.from_dict(all_folds_scores, orient="index")
        mean = all_folds_scores_df.mean()
        std = all_folds_scores_df.std()
        all_folds_scores_df.loc["mean"] = mean
        all_folds_scores_df.loc["std"] = std


        all_folds_scores_excl_other_df = pd.DataFrame.from_dict(all_folds_scores_excl_other, orient="index")
        mean_no = all_folds_scores_excl_other_df.mean()
        std_no = all_folds_scores_excl_other_df.std()
        all_folds_scores_excl_other_df.loc["mean"] = mean_no
        all_folds_scores_excl_other_df.loc["std"] = std_no
        
        
        # save as csv files
        if cw_flag:
            cw_suffix = "_CW_"
        else:
            cw_suffix = ""

        all_folds_scores_df.to_csv(os.path.join(store_results_dir, f"{model_type}{cw_suffix[:-1]}_eval_with_other.csv"), encoding="utf-8")
        all_folds_scores_excl_other_df.to_csv(os.path.join(store_results_dir, f"{model_type}{cw_suffix[:-1]}_eval_excl_other.csv"), encoding="utf-8")

        model_summary.loc[f"{cw_suffix[1:]}{model_sorter[model_type]}_{model_type}"] = all_folds_scores_df.loc["mean"]
        model_summary_excl_other.loc[f"{cw_suffix[1:]}{model_sorter[model_type]}_{model_type}_excl_other"] = all_folds_scores_excl_other_df.loc["mean"]
        print("Number of classes (combined histology+behavior)", unique_labels_pred_his_beh_excl_other)

model_summary.sort_index(inplace=True)
model_summary.to_csv(os.path.join(store_results_dir, f"SUMMARY_mor_his_beh_means_with_other.csv"), encoding="utf-8")

model_summary_excl_other.sort_index(inplace=True)
model_summary_excl_other.to_csv(os.path.join(store_results_dir, f"SUMMARY_mor_his_beh_means_excl_other.csv"), encoding="utf-8")



Creating evaluation for model LOGR, (cws True)
Number of classes (combined histology+behavior) [25, 24, 25, 23, 26]
Creating evaluation for model LOGR, (cws False)
Number of classes (combined histology+behavior) [20, 18, 20, 20, 19]
Creating evaluation for model CNN, (cws True)
Number of classes (combined histology+behavior) [32, 29, 30, 33, 31]
Creating evaluation for model CNN, (cws False)
Number of classes (combined histology+behavior) [24, 26, 25, 24, 24]
Creating evaluation for model MTCNN, (cws True)
Number of classes (combined histology+behavior) [27, 26, 28, 28, 27]
Creating evaluation for model MTCNN, (cws False)
Number of classes (combined histology+behavior) [20, 25, 21, 23, 21]
Creating evaluation for model HISAN, (cws True)
Number of classes (combined histology+behavior) [31, 28, 29, 33, 32]
Creating evaluation for model HISAN, (cws False)
Number of classes (combined histology+behavior) [30, 27, 25, 28, 28]
Creating evaluation for model MTHISAN, (cws True)
Number of classe

In [6]:
models_in_order = model_sorter.keys()
for result_df, suffix in [(model_summary, "with_other"), 
                          (model_summary_excl_other, "excl_other")]:
    
    # save with rounded numbers
    result_df = result_df * 100
    result_df = result_df.round(decimals=2)
    result_df.to_csv(os.path.join(store_results_dir, f"SUMMARY_ROUNDED_mor_his_beh_means_{suffix}.csv"), encoding="utf-8")



    results_with_CW = result_df.iloc[:7, :].copy(deep=True)
    results_without_CW = result_df.iloc[7:, :].copy(deep=True)
    assert len(results_with_CW) == len(results_without_CW)

    # assign new index to concat dataframes neatly later on
    for i, modelname in enumerate(models_in_order):
        assert modelname in results_with_CW.index[i]
        assert modelname in results_without_CW.index[i]
    results_with_CW.index = models_in_order
    results_without_CW.index = models_in_order

    # change column names to keep info about cws
    for col in results_with_CW.columns:
        results_with_CW.rename({col: col+"_+CW"}, inplace=True, axis="columns")

    for col in results_without_CW.columns:
        results_without_CW.rename({col: col+"_-CW"}, inplace=True, axis="columns")

    table_order = ["acc_mor_+CW","acc_hisbeh_+CW",
                    "acc_mor_-CW","acc_hisbeh_-CW",
                    "f1_mor_+CW", "f1_hisbeh_+CW",
                    "f1_mor_-CW", "f1_hisbeh_-CW", 
                    
                    "acc_his_from_mor_+CW", "acc_his_+CW", 
                    "acc_his_from_mor_-CW", "acc_his_-CW",
                    "f1_his_from_mor_+CW",  "f1_his_+CW",  
                    "f1_his_from_mor_-CW",  "f1_his_-CW", 

                    "acc_beh_from_mor_+CW", "acc_beh_+CW",
                    "acc_beh_from_mor_-CW", "acc_beh_-CW",
                    "f1_beh_from_mor_+CW",  "f1_beh_+CW",
                    "f1_beh_from_mor_-CW",  "f1_beh_-CW"]

    table_df = pd.concat([results_with_CW, results_without_CW], axis=1)
    table_df_final_order = table_df[table_order]

    table_df_final_order.to_csv(os.path.join(store_results_dir, f"SUMMARY_ROUNDED_TABLE_mor_his_beh_means_{suffix}.csv"), encoding="utf-8")
