In [1]:
import pickle
import sys
import numpy as np
def print_res(res_dict, model_type):
    """
    find the best configuration, including best index, AUC and accruacy
    if there are multiple indexes, find the index with highest accuracy

    :param res_dict: a dictionary contains all evaluation results
    :type res_dict: dict
    :param model_type: the model type, including 'ratio' and 'norm'
    :type model_type: str
    :return: the best configuration
    :rtype: int/list
    """
    best_auc = max(res_dict[model_type+"_auc"])
    best_index = [index for index, value in enumerate(res_dict[model_type+"_auc"]) \
        if value == best_auc]
    # if there is multiple best indexes
    if len(best_index) > 1:
        # find the highest accuracy
        accus = [res_dict[model_type+"_accu"][ind] for ind in best_index]
        best_accu = max(accus)
        best_index = [index for index, value in enumerate(accus)\
            if value == best_accu]
    else:
        best_accu = res_dict[model_type+"_accu"][best_index[0]]
    return best_index


def find_best_pattern(data_type, zero_style, model_type):
    share_25 = {"con_auc": [], "con_accu": [],
                 "con_cor": [], "con_ppl": [],
                 "dem_auc": [], "dem_accu": [],
                 "dem_cor": [], "dem_ppl": [],
                 "ratio_auc": [], "ratio_accu": [],
                 "ratio_cor": [], "ratio_ppl": [],
                 "norm_auc":[], "norm_accu":[],
                 "norm_cor":[], "norm_ppl":[]}
    share_50 = {"con_auc": [], "con_accu": [],
                 "con_cor": [], "con_ppl": [],
                 "dem_auc": [], "dem_accu": [],
                 "dem_cor": [], "dem_ppl": [],
                 "ratio_auc": [], "ratio_accu": [],
                 "ratio_cor": [], "ratio_ppl": [],
                 "norm_auc":[], "norm_accu":[],
                 "norm_cor":[], "norm_ppl":[]}
    share_75 = {"con_auc": [], "con_accu": [],
                 "con_cor": [], "con_ppl": [],
                 "dem_auc": [], "dem_accu": [],
                 "dem_cor": [], "dem_ppl": [],
                 "ratio_auc": [], "ratio_accu": [],
                 "ratio_cor": [], "ratio_ppl": [],
                 "norm_auc":[], "norm_accu":[],
                 "norm_cor":[], "norm_ppl":[]}
    share_100 = {"con_auc": [], "con_accu": [],
                 "con_cor": [], "con_ppl": [],
                 "dem_auc": [], "dem_accu": [],
                 "dem_cor": [], "dem_ppl": [],
                 "ratio_auc": [], "ratio_accu": [],
                 "ratio_cor": [], "ratio_ppl": [],
                 "norm_auc":[], "norm_accu":[],
                 "norm_cor":[], "norm_ppl":[]}
    file_25 = "../ppl/accumu_{}_{}_25.pkl".format(data_type, zero_style)
    file_50 = "../ppl/accumu_{}_{}_50.pkl".format(data_type, zero_style)
    file_75 = "../ppl/accumu_{}_{}_75.pkl".format(data_type, zero_style)
    file_100 = "../ppl/accumu_{}_{}_100.pkl".format(data_type, zero_style)
    with open(file_25, "rb") as f:
        res_dict = pickle.load(f)
        for key in res_dict.keys():
            if isinstance(res_dict[key], list):
                share_25[key].extend(res_dict[key])
            else:
                share_25[key].append(res_dict[key])
    with open(file_50, "rb") as f:
        res_dict = pickle.load(f)
        for key in res_dict.keys():
            if isinstance(res_dict[key], list):
                share_50[key].extend(res_dict[key])
            else:
                share_50[key].append(res_dict[key])
    with open(file_75, "rb") as f:
        res_dict = pickle.load(f)
        for key in res_dict.keys():
            if isinstance(res_dict[key], list):
                share_75[key].extend(res_dict[key])
            else:
                share_75[key].append(res_dict[key])
    with open(file_100, "rb") as f:
        res_dict = pickle.load(f)
        for key in res_dict.keys():
            if isinstance(res_dict[key], list):
                share_100[key].extend(res_dict[key])
            else:
                share_100[key].append(res_dict[key])
    
    sys.stdout.write("| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|\n")
    sys.stdout.write("| - | - | - | - | - | - | - | - | - | - | - |\n")
    format_result(share_25, model_type, data_type, 25)
    format_result(share_50, model_type, data_type, 50)
    format_result(share_75, model_type, data_type, 75)
    format_result(share_100, model_type, data_type, 100)
    
    
def format_result(res_dict, model_type, data_name, share):
    best_index = print_res(res_dict, model_type)
    best_index = best_index[0]
    best_dict = {}
    # narrow down to the best result
    for k, v in res_dict.items():
        try:
            if isinstance(v, list):
                best_dict[k] = v[best_index]
            else:
                best_dict[k] = v
        except IndexError:
            best_dict[k] = v[0]
    sys.stdout.write("| {} | {} | {} ({})| {} ({}) | {} ({})| {} ({})| {} ({}) | {} ({})| {} ({})| {} ({}) | {} ({})|\n".format(
        data_name+"-"+str(share), best_index+1,
        np.mean(best_dict["con_auc"]), np.std(best_dict["con_auc"]),
        np.mean(best_dict["con_accu"]), np.std(best_dict["con_accu"]),
        np.mean(best_dict["con_cor"]), np.std(best_dict["con_cor"]),
        np.mean(best_dict["dem_auc"]), np.std(best_dict["dem_auc"]),
        np.mean(best_dict["dem_accu"]), np.std(best_dict["dem_accu"]),
        np.mean(best_dict["dem_cor"]), np.std(best_dict["dem_cor"]),
        np.mean(best_dict["ratio_auc"]), np.std(best_dict["ratio_auc"]),
        np.mean(best_dict["ratio_accu"]), np.std(best_dict["ratio_accu"]),
        np.mean(best_dict["ratio_cor"]), np.std(best_dict["ratio_cor"])
    ))


In [6]:
find_best_pattern("adr", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| adr-25 | 7 | 0.616 (0.0)| 0.613 (0.0) | -0.292 (0.0)| 0.362 (0.0)| 0.387 (0.0) | 0.131 (0.0)| 0.795 (0.0)| 0.716 (0.0) | -0.512 (0.0)|
| adr-50 | 9 | 0.616 (0.0)| 0.613 (0.0) | -0.292 (0.0)| 0.344 (0.0)| 0.413 (0.0) | 0.155 (0.0)| 0.797 (0.0)| 0.69 (0.0) | -0.543 (0.0)|
| adr-75 | 6 | 0.616 (0.0)| 0.613 (0.0) | -0.292 (0.0)| 0.295 (0.0)| 0.368 (0.0) | 0.349 (0.0)| 0.72 (0.0)| 0.632 (0.0) | -0.388 (0.0)|
| adr-100 | 4 | 0.616 (0.0)| 0.613 (0.0) | -0.292 (0.0)| 0.42 (0.0)| 0.465 (0.0) | 0.08 (0.0)| 0.78 (0.0)| 0.723 (0.0) | -0.566 (0.0)|


In [7]:
find_best_pattern("db", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| db-25 | 7 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.371 (0.0)| 0.394 (0.0) | 0.155 (0.0)| 0.8 (0.0)| 0.758 (0.0) | -0.418 (0.0)|
| db-50 | 5 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.354 (0.0)| 0.383 (0.0) | 0.204 (0.0)| 0.823 (0.0)| 0.732 (0.0) | -0.386 (0.0)|
| db-75 | 2 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.413 (0.0)| 0.45 (0.0) | 0.177 (0.0)| 0.712 (0.0)| 0.643 (0.0) | -0.282 (0.0)|
| db-100 | 5 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.412 (0.0)| 0.405 (0.0) | 0.056 (0.0)| 0.764 (0.0)| 0.706 (0.0) | -0.289 (0.0)|


In [8]:
find_best_pattern("ccc", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| ccc-25 | 5 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.642 (0.0)| 0.625 (0.0) | nan (nan)| 0.719 (0.0)| 0.69 (0.0) | nan (nan)|
| ccc-50 | 12 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.569 (0.0)| 0.55 (0.0) | nan (nan)| 0.78 (0.0)| 0.718 (0.0) | nan (nan)|
| ccc-75 | 10 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.453 (0.0)| 0.457 (0.0) | nan (nan)| 0.792 (0.0)| 0.736 (0.0) | nan (nan)|
| ccc-100 | 9 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.59 (0.0)| 0.574 (0.0) | nan (nan)| 0.749 (0.0)| 0.69 (0.0) | nan (nan)|


In [9]:
find_best_pattern("db", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| db-25 | 7 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.371 (0.0)| 0.394 (0.0) | 0.155 (0.0)| 0.8 (0.0)| 0.758 (0.0) | -0.418 (0.0)|
| db-50 | 5 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.354 (0.0)| 0.383 (0.0) | 0.204 (0.0)| 0.823 (0.0)| 0.732 (0.0) | -0.386 (0.0)|
| db-75 | 2 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.413 (0.0)| 0.45 (0.0) | 0.177 (0.0)| 0.712 (0.0)| 0.643 (0.0) | -0.282 (0.0)|
| db-100 | 5 | 0.658 (0.0)| 0.606 (0.0) | -0.187 (0.0)| 0.412 (0.0)| 0.405 (0.0) | 0.056 (0.0)| 0.764 (0.0)| 0.706 (0.0) | -0.289 (0.0)|


In [4]:
find_best_pattern("adr_train", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| adr_train-25 | 8 | 0.564 (0.0)| 0.523 (0.0) | -0.268 (0.0)| 0.331 (0.0)| 0.346 (0.0) | 0.137 (0.0)| 0.78 (0.0)| 0.71 (0.0) | -0.522 (0.0)|
| adr_train-50 | 9 | 0.564 (0.0)| 0.523 (0.0) | -0.268 (0.0)| 0.324 (0.0)| 0.364 (0.0) | 0.151 (0.0)| 0.749 (0.0)| 0.636 (0.0) | -0.517 (0.0)|
| adr_train-75 | 6 | 0.564 (0.0)| 0.523 (0.0) | -0.268 (0.0)| 0.296 (0.0)| 0.374 (0.0) | 0.336 (0.0)| 0.689 (0.0)| 0.636 (0.0) | -0.359 (0.0)|
| adr_train-100 | 2 | 0.564 (0.0)| 0.523 (0.0) | -0.268 (0.0)| 0.45 (0.0)| 0.486 (0.0) | -0.053 (0.0)| 0.746 (0.0)| 0.654 (0.0) | -0.57 (0.0)|


In [5]:
find_best_pattern("adr_test", "first", "ratio")

| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| adr_test-25 | 5 | 0.759 (0.0)| 0.75 (0.0) | -0.382 (0.0)| 0.415 (0.0)| 0.438 (0.0) | 0.046 (0.0)| 0.87 (0.0)| 0.729 (0.0) | -0.466 (0.0)|
| adr_test-50 | 6 | 0.759 (0.0)| 0.75 (0.0) | -0.382 (0.0)| 0.396 (0.0)| 0.396 (0.0) | 0.134 (0.0)| 0.894 (0.0)| 0.833 (0.0) | -0.601 (0.0)|
| adr_test-75 | 8 | 0.759 (0.0)| 0.75 (0.0) | -0.382 (0.0)| 0.328 (0.0)| 0.375 (0.0) | 0.318 (0.0)| 0.792 (0.0)| 0.729 (0.0) | -0.498 (0.0)|
| adr_test-100 | 4 | 0.759 (0.0)| 0.75 (0.0) | -0.382 (0.0)| 0.521 (0.0)| 0.5 (0.0) | 0.008 (0.0)| 0.87 (0.0)| 0.833 (0.0) | -0.577 (0.0)|


In [2]:
print("new ccc version")
find_best_pattern("ccc", "first", "ratio")

new ccc version
| dataset | best index| con AUC (SD)| con ACC (SD) | con r with MMSE (SD)| dem AUC (SD)| dem ACC (SD) | dem r with MMSE (SD)| ratio AUC (SD)| ratio ACC (SD) | ratio r with MMSE (SD)|
| - | - | - | - | - | - | - | - | - | - | - |
| ccc-25 | 5 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.642 (0.0)| 0.625 (0.0) | nan (nan)| 0.719 (0.0)| 0.69 (0.0) | nan (nan)|
| ccc-50 | 3 | 0.597 (0.0)| 0.567 (0.0) | nan (nan)| 0.455 (0.0)| 0.454 (0.0) | nan (nan)| 0.715 (0.0)| 0.667 (0.0) | nan (nan)|
| ccc-75 | 10 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.453 (0.0)| 0.457 (0.0) | nan (nan)| 0.792 (0.0)| 0.736 (0.0) | nan (nan)|
| ccc-100 | 9 | 0.74 (0.0)| 0.646 (0.0) | nan (nan)| 0.59 (0.0)| 0.574 (0.0) | nan (nan)| 0.749 (0.0)| 0.69 (0.0) | nan (nan)|
