In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import json
import os
import warnings
warnings.filterwarnings('ignore')
from scipy import stats

In [2]:
# 按照医院的大小来对结果进行分析
hospital = pd.read_csv('../data/eicu/handled/hospital.csv')

In [3]:
def analyze_data(file_path):
    #filter_id = [152, 92, 220, 188, 181, 195, 171, 110, 176, 122, 420, 243, 140]
    filter_id = [188, 220, 152, 269, 183, 345, 208, 419, 167, 198, 416, 312, 275, 224, 413]

    with open(file_path, 'r') as f:
        data = json.load(f)

    exp_id, model, hos_id, jaccard, f1, prauc = [], [], [], [], [], []
    for key in data.keys():
        #if data[key]['hos_id'] in filter_id:
        #if data[key]['hos_id'] != 336:
        exp_id.append(key)
        model.append(data[key]['model'])
        hos_id.append(data[key]['hos_id'])
        jaccard.append(data[key]['jaccard'])
        f1.append(data[key]['f1'])
        prauc.append(data[key]['prauc'])
    
    df = pd.DataFrame({'exp_id': exp_id,
                       'model': model,
                       'hospital_id': hos_id,
                       'jaccard': jaccard,
                       'f1': f1,
                       'prauc': prauc})
    
    df = pd.merge(df, hospital, on='hospital_id', how='left')
    df['exp_id'] = df.groupby('hospital_id')['exp_id'].transform('rank')
    
    df_exp = pd.DataFrame(columns=['Jaccard_S', 'F1_S', 'PRAUC_S', 
                                    'Jaccard_M', 'F1_M', 'PRAUC_M',
                                    'Jaccard_L', 'F1_L', 'PRAUC_L',
                                    'Jaccard_all', 'F1_all', 'PRAUC_all'])
    for i in range(5):

        df_per = df.loc[df['exp_id']==(i+1)]
        res_dict = {}
        res_dict['S'] = df_per[df_per['record_num']<=1000].groupby('hospital_id').agg('mean')[['jaccard', 'f1', 'prauc']].mean().values
        res_dict['M'] = df_per[(df_per['record_num']>1000) & (df_per['record_num']<2000)].groupby('hospital_id').agg('mean')[['jaccard', 'f1', 'prauc']].mean().values
        res_dict['L'] = df_per[(df_per['record_num']>=2000)].groupby('hospital_id').agg('mean')[['jaccard', 'f1', 'prauc']].mean().values
        res_dict['all'] = df_per.groupby('hospital_id').agg('mean')[['jaccard', 'f1', 'prauc']].mean().values
        instance = np.concatenate((res_dict['S'], res_dict['M'], res_dict['L'], res_dict['all']))
        df_exp.loc[i] = instance
    
    res_mean, res_std = df_exp.mean(), df_exp.std()

    print(file_path+': %d Hospital, %.3f Experiments' % (df.hospital_id.nunique(), df.groupby('hospital_id')['exp_id'].agg('count').mean()))

    return (res_mean.values, res_std.values), df_exp

In [4]:
def display_data(path_list):

    display_df_mean = pd.DataFrame(columns=['Jaccard_S', 'F1_S', 'PRAUC_S', 
                                            'Jaccard_M', 'F1_M', 'PRAUC_M',
                                            'Jaccard_L', 'F1_L', 'PRAUC_L',
                                            'Jaccard_all', 'F1_all', 'PRAUC_all'])
    display_df_std = pd.DataFrame(columns=['Jaccard_S', 'F1_S', 'PRAUC_S', 
                                           'Jaccard_M', 'F1_M', 'PRAUC_M',
                                           'Jaccard_L', 'F1_L', 'PRAUC_L',
                                           'Jaccard_all', 'F1_all', 'PRAUC_all'])
    
    for path in path_list:

        instance, _ = analyze_data(path)
        instance_mean, instance_std = instance[0], instance[1]
        display_df_mean.loc[path.split('.')[-2].split('/')[-1]] = instance_mean
        display_df_std.loc[path.split('.')[-2].split('/')[-1]] = instance_std
    
    return display_df_mean, display_df_std

In [None]:
display_df_mean, display_df_std = display_data([
                           './log/results/prompt.json',
                           ])

In [None]:
display_df_mean

In [None]:
display_df_std