In [None]:
# In[ ]:
# ** import package **
import os
import sys
import json
import pathlib
sys.path.append("..")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import traceback
from tqdm import tqdm
from datetime import timedelta
from _utils.customlogger import customlogger as CL

pd.set_option('display.max_colwidth', -1)  #각 컬럼 width 최대로 
pd.set_option('display.max_rows', 50)      # display 50개 까지 


In [None]:
# In[ ]:
# ** loading path info **
current_dir = pathlib.Path.cwd()
parent_dir = current_dir.parent
curr_file_name = os.path.splitext(os.path.basename(os.path.abspath('')))[0]
# result_dir = parent_dir.joinpath('result', 'eicu')
result_dir = parent_dir.joinpath('result', '_backup','2022_08_23-09_18_26_AM', 'eicu')
pathlib.Path.mkdir(result_dir, mode=0o777, parents=True, exist_ok=True)

# edges_name = ['edge_0', 'edge_1', 'edge_2', 'edge_3', 'edge_4', 'edge_5', 'global']
# central_name = ['central']
edges_name = ['edge_0', 'edge_1', 'edge_2', 'edge_3', 'edge_4', 'edge_5']
central_name = []

In [None]:
feature_importance_dict = {}
model_performance_dict = {}
for edge_name in (central_name + edges_name):
    feature_importance_dict[edge_name] = pd.read_feather(result_dir.joinpath(edge_name, 'feature_importance.feather')).transpose()
    model_performance_dict[edge_name] = pd.read_feather(result_dir.joinpath(edge_name, 'model_performance.feather')).transpose()

In [None]:
# annotate each cell with the numeric value of integer format
# heatmap by seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def convertRank(df):
    df = df.copy()
    for col in df.columns:
        df[col] = df[col].rank(ascending=False)
    return df.astype('int32')

In [None]:
feature_importance_dict['edge_0']

In [None]:
for edge_name in edges_name:

    plt.rcParams['figure.figsize'] = [15, 7]
    fig=plt.figure()
    sns.heatmap(feature_importance_dict[edge_name], annot=True, fmt='.2f')
    plt.title('feature importance per round ({})'.format(edge_name), fontsize=20)
    plt.savefig('{}/feature_importance_{}.png'.format(result_dir, edge_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
    plt.show()
    
    plt.rcParams['figure.figsize'] = [15, 7]
    fig=plt.figure()
    sns.heatmap(convertRank(feature_importance_dict[edge_name]), annot=True, fmt='d')
    plt.title('feature importance per round ({})_rank'.format(edge_name), fontsize=20)
    plt.savefig('{}/feature_importance_{}_rank.png'.format(result_dir, edge_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
    plt.show()
    


In [None]:
central_df = feature_importance_dict['central']
feature_importance_dict['central'] = pd.DataFrame(central_df[central_df.columns[-1]])
feature_importance_dict['central']  = feature_importance_dict['central'].rename({1:0}, axis=1)


In [None]:
plt.rcParams['figure.figsize'] = [2, 10]
fig=plt.figure()
sns.heatmap(feature_importance_dict['central'], annot=True, fmt='.3f')
plt.title('feature importance ({})'.format('central'), fontsize=13)
plt.savefig('{}/feature_importance_{}.png'.format(result_dir, 'central'), format='png',
        dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
plt.show()

plt.rcParams['figure.figsize'] = [2, 10]
fig=plt.figure()
sns.heatmap(convertRank(feature_importance_dict['central']), annot=True, fmt='d')
plt.title('feature importance ({})_rank'.format('central'), fontsize=13)
plt.savefig('{}/feature_importance_{}_rank.png'.format(result_dir, 'central'), format='png',
        dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
plt.show()


In [None]:
model_performance_dict['edge_0'].transpose().loc[:,['accuracy', 'roc_auc', 'f1score']]

In [None]:
df= model_performance_dict[edge_name].transpose()
df

In [None]:

df = model_performance_dict[edge_name].transpose()
# df['tpr'] = df(['TP']/(['TP']+['FN']))
# df['fpr'] = df(['FP']/(['FP']+['TN']))
df['tpr'] = df.apply(lambda x : x["TP"]/(x["TP"]+x["FN"]), axis=1)
df['fpr'] = df.apply(lambda x : x["FP"]/(x["FP"]+x["TN"]), axis=1)
df


In [None]:
group_names = ['TP', 'FN', 'FP', 'TN']
group_counts = [240, 153, 1081, 4897]
labels = ['{}\n{}'.format(v1, v2, v3) for v1, v2, v3 in zip(group_names,group_counts)]
labels = np.asarray(labels).reshape(2,2)
cm_figure = sns.heatmap(cf_norm, annot=labels, fmt='', xticklabels=['1','0'], yticklabels=['1','0'], cmap='Blues')
cm_figure.set_title('Confusion matrix')
cm_figure.set_xlabel('Predicted label')
cm_figure.set_ylabel('True label')
plt.setp(cm_figure.get_yticklabels(), rotation=0)
plt.savefig('{}/{}_CM2.png'.format(output_domain_path,outcome_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')

In [None]:
for edge_name in edges_name:
    plt.rcParams['figure.figsize'] = [10, 6]
    fig=plt.figure()
    df = model_performance_dict[edge_name].transpose()
    df['tpr'] = df.apply(lambda x : x["TP"]/(x["TP"]+x["FN"]), axis=1)
    df['fpr'] = df.apply(lambda x : x["FP"]/(x["FP"]+x["TN"]), axis=1)
    df.loc[:,['accuracy', 'roc_auc', 'f1score', 'tpr', 'fpr']].plot()
    plt.title('performance per round ({})'.format(edge_name), fontsize=13)
    plt.savefig('{}/performance_{}.png'.format(result_dir, edge_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
    plt.ylim([0, 1])
    plt.show()
    #.pivot(columns='index', index='')

In [None]:

model_performance_dict['edge_0'].drop(['f1', 'f2', 'f3', 'f4'], axis=0)

model_performance_dict['edge_0'].loc[['accuracy', 'f1score', 'roc_auc']].pivot(index=index)


In [None]:
for edge_name in edges_name:

    plt.rcParams['figure.figsize'] = [15, 7]
    fig=plt.figure()
    sns.heatmap(feature_importance_dict[edge_name], annot=True, fmt='.2f')
    plt.title('feature importance per round ({})'.format(edge_name), fontsize=20)
    plt.savefig('{}/feature_importance_{}.png'.format(result_dir, edge_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
    plt.show()
    
    plt.rcParams['figure.figsize'] = [15, 7]
    fig=plt.figure()
    sns.heatmap(convertRank(feature_importance_dict[edge_name]), annot=True, fmt='d')
    plt.title('feature importance per round ({})_rank'.format(edge_name), fontsize=20)
    plt.savefig('{}/feature_importance_{}_rank.png'.format(result_dir, edge_name), format='png',
            dpi=300, facecolor='white', transparent=True,  bbox_inches='tight')
    plt.show()
    


feature_importance_dict = {}
model_performance_dict = {}
for edge_name in (central_name + edges_name):
    feature_importance_dict[edge_name] = pd.read_feather(result_dir.joinpath(edge_name, 'feature_importance.feather')).transpose()
    model_performance_dict[edge_name] = pd.read_feather(result_dir.joinpath(edge_name, 'model_performance.feather')).transpose()

In [None]:
model_performance_dict