In [18]:
import tensorflow as tf, os, pandas as pd, numpy as np, pickle, glob
from sklearn.model_selection import KFold
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

In [19]:
USD_data_test_pkl = glob.glob("./dataset/baseline-testdataset_pickle_save/sample_500_seed_42_feature/*pkl")

In [20]:
import keras
def create_model(input_shape = (1024,)):
  model = tf.keras.Sequential([
    keras.layers.Dense(256, activation='relu', input_shape = input_shape),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation='sigmoid')
  ])
  model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['acc'])
  return model

In [38]:
from sklearn.metrics import classification_report, roc_curve, precision_recall_curve, roc_auc_score
from sklearn.metrics import confusion_matrix
import matplotlib.pylab as plt
from tqdm import tqdm
def get_weight_path(fold, train_type):
    return './model_weight/PeriHD/weights_periHD_KFold={}[{}-only-merge-sequnece-resplit-pos_neg].h5'.format(fold,train_type)

def load_pkl(pkl):
    with open(pkl, 'rb') as f:
        pk_dict = pickle.load(f)
    return pk_dict

kfold = KFold(n_splits=5, shuffle=True)
train_type = 'USD'
input_shape  = (512,)
ths = 0.5
pred_dict ={'病人ID':[], '病人透析時間':[], '病人透析前血壓':[], 
            'sbp差(取最大值)':[], 'mbp差(取最大值)':[], 
            'NLP判斷結果(Y/N)':[], 'BP判斷結果(Y/N)':[],
            'IntraHD最高分斷句':[], 'PeriHD最高分斷句':[],
            'IntraHD預測分數':[], 'PeriHD預測分數':[],
            '預期結果(Y/N)':[], 
            'PeriHD完整文句':[], 'IntraHD完整文句':[]}
count_peri = 0
count_intra = 0
for pkl_idx, next_test_pkl in tqdm(enumerate(USD_data_test_pkl), total=len(USD_data_test_pkl)):
        periHD_intraHD_dict = load_pkl(next_test_pkl)
        pred_dict['病人ID'].append(str(periHD_intraHD_dict['ID']))
        pred_dict['病人透析時間'].append(str(periHD_intraHD_dict['DateTime']))
        # PeriHD Data
        PeriHD_sbp, PeriHD_dbp =  periHD_intraHD_dict['PeriHD_data_sbp'], periHD_intraHD_dict['PeriHD_data_dbp']
        pred_dict['病人透析前血壓'].append('{}/{}'.format(PeriHD_sbp, PeriHD_dbp))
        PeriHD_mbp = int((PeriHD_sbp+PeriHD_dbp)/2)
        PeriHD_context = periHD_intraHD_dict['PeriHD_context']
        PeriHD_Feature = periHD_intraHD_dict['PeriHD_Feature']
        count_peri+=1
        # IntraHD Data
        IntraHD_sbp, IntraHD_dbp =  periHD_intraHD_dict['IntraHD_data_sbp'], periHD_intraHD_dict['IntraHD_data_dbp']
        IntraHD_mbp = ((IntraHD_sbp+IntraHD_dbp)/2).astype(int)
        IntraHD_context = periHD_intraHD_dict['IntraHD_context']
        IntraHD_Feature = periHD_intraHD_dict['IntraHD_Feature']
        count_intra +=len(IntraHD_context)
        # 比較血壓是否有透析中低血壓
        
        max_difference_mbp = max(PeriHD_mbp-IntraHD_mbp)
        max_ifference_sbp = max(PeriHD_sbp-IntraHD_sbp)
        pred_dict['sbp差(取最大值)'].append(max_ifference_sbp)
        pred_dict['mbp差(取最大值)'].append(max_difference_mbp)
        bp_result = 'Y' if (max_difference_mbp>10) or (max_ifference_sbp>=20) else 'N'
        pred_dict['BP判斷結果(Y/N)'].append(bp_result)
        peri_predict_result=[0.0]*len(periHD_intraHD_dict['PeriHD_Feature'])
        intra_predict_result=[0.0]*len(periHD_intraHD_dict['IntraHD_Feature'])
        for fold in ['1','2','3','4','5']:
            # for fold in ['1','2','3','4','5']:
            weight_name = get_weight_path(fold, train_type)
            model_cls = create_model(input_shape=input_shape)
            model_cls.load_weights(weight_name)
            # nlp cls predict - periHD
            peri_predict_result = peri_predict_result + model_cls.predict(periHD_intraHD_dict['PeriHD_Feature'], verbose=0).flatten()
            # nlp cls predict - intraHD
            intra_predict_result = intra_predict_result + model_cls.predict(periHD_intraHD_dict['IntraHD_Feature'], verbose=0).flatten()
        peri_predict_result = peri_predict_result/5
        intra_predict_result = intra_predict_result/5
        pred_dict['PeriHD預測分數'].append(round(max(peri_predict_result),4))
        pred_dict['PeriHD最高分斷句'].append(PeriHD_context[np.argmax(peri_predict_result)])

        pred_dict['IntraHD預測分數'].append(round(max(intra_predict_result),4))
        pred_dict['IntraHD最高分斷句'].append(IntraHD_context[np.argmax(intra_predict_result)])
        # 
        pred_dict['NLP判斷結果(Y/N)'].append('Y' if sum(peri_predict_result > ths)+sum(intra_predict_result > ths)!=0 else 'N')
        nlp_result = 'Y' if max(peri_predict_result)>0.5 or max(intra_predict_result)>0.5 else 'N'
        pred_dict['預期結果(Y/N)'].append('Y' if nlp_result==bp_result and bp_result=='Y' else 'N')
        full_text = ''
        for idx_text, text in enumerate(PeriHD_context):
            if idx_text==0:
                full_text=text
            else:
                full_text+=("，"+text.split("，")[1])
        pred_dict['PeriHD完整文句'].append(full_text)
        pred_dict['IntraHD完整文句'].append("".join(IntraHD_context).replace('無 || 無','，').replace('無 || ','，').replace(' || 無','，'))
        # break
    #     test_binary_pred2 = (np.array(split_data_external['pred'])>0.5).astype(np.int8)

    #     print(next_test_pkl)
    #     print(periHD_intraHD_dict['ID'],str(periHD_intraHD_dict['DateTime']))
    #     print(periHD_intraHD_dict['PeriHD_Feature'].shape,periHD_intraHD_dict['IntraHD_Feature'].shape)
    #     print((PeriHD_sbp), (PeriHD_dbp))
    #     print((IntraHD_sbp), (IntraHD_dbp))
    # # if pkl_idx==2:
    #     break

100%|██████████| 497/497 [06:36<00:00,  1.25it/s]


In [25]:
count_peri, count_intra

(497, 2749)

In [39]:
pd.DataFrame(pred_dict)

Unnamed: 0,病人ID,病人透析時間,病人透析前血壓,sbp差(取最大值),mbp差(取最大值),NLP判斷結果(Y/N),BP判斷結果(Y/N),IntraHD最高分斷句,PeriHD最高分斷句,IntraHD預測分數,PeriHD預測分數,預期結果(Y/N),PeriHD完整文句,IntraHD完整文句
0,09359CH38,2020-07-20 17:50:00,128/73,0,0,N,N,HD end || 無,透析結束後無不適，依「血液透析病人透析後防跌評估表」進行防跌篩檢：0分,0.0025,0.0119,N,透析結束後無不適，依「血液透析病人透析後防跌評估表」進行防跌篩檢：0分，AVF止血後，紗布覆...,，，，NESP 20 μg iv at 2020-07-20 19:50，，HD end，
1,53496939,2019-04-16 17:25:00,191/82,56,35,N,Y,主訴無不適情形，持續觀察至透析畢。 || 無,口頭衛教居家照服員預防跌倒注意事項，並由居家照服員協助下坐輪椅返家,0.0837,0.0333,N,HD畢，無不適情形，AVF 已止血，紗布覆蓋，依「血液透析病人透析後防跌評估表」評分內容，評...,主訴上次返家後無不適情形。，，，主訴無不適情形，持續觀察至透析畢。，end HD。，
2,16455363,2017-02-03 12:50:00,127/83,67,56,Y,Y,"recheck BP 83/53mmHg || 血壓變化 /-監測生命徵象,調整脫水速率,N...",HD畢，無不適情形,0.6493,0.0060,Y,HD畢，無不適情形，HF Clear，perm cath on hepain lock留置，...,"依醫囑予Recormon 2000u iv at1350，，彭渝森醫師來視,囑更改人工腎臟為..."
3,10023101,2018-12-21 12:45:00,168/86,10,9,N,N,NESP 20 μg(mcg) iv at 13:30 || 無,HD畢，無不適情形,0.0001,0.0060,N,HD畢，無不適情形，依「血液透析病人透析後防跌評估表」評估項目為0項，AVF已止血，紗布覆蓋...,，NESP 20 μg(mcg) iv at 13:30，，，，
4,0X9512999,2016-09-15 10:30:00,139/59,64,34,Y,Y,"無 || 血壓變化 /血壓低-監測生命徵象,調整脫水速率,N/S Challenge,100cc",thrill（++），已予icu護理師交班,0.9430,0.0004,Y,AVG血管穿刺處已止血，thrill（++），已予icu護理師交班，因病情需要續icu照護,"，，，，，，，血壓變化 /血壓低-監測生命徵象,調整脫水速率,N/S Challenge,1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
492,27846474,2019-10-17 12:22:00,137/64,33,20,N,Y,FIR || 無,AVF紗布加壓止血，家屬陪伴推輪椅離開HDR,0.0092,0.0036,N,透析後無不適，透析後評估項目0項，AVF紗布加壓止血，家屬陪伴推輪椅離開HDR,FIR，預防性自服2顆升壓藥，Recormon 2000U iv at 14:28，，，其他...
493,0442G9242,2016-02-19 12:50:00,83/38,-9,-7,Y,N,"無 || 抽筋-調整脫水速率,其他,Dr楊調整DW為try46.2KG;",HD畢，依醫囑予Vit-B12 1Amp IV at 16:20,0.9954,0.0018,N,HD畢，依醫囑予Vit-B12 1Amp IV at 16:20，R't perm cat...,Midodrine 2# PO，Dr楊診視開止癢藥使用並在下次更換人工腎臟，，抽筋-調整脫水...
494,54554242,2017-04-25 12:40:00,160/77,46,35,Y,Y,無 || 其他-透析後段時間常感不適，予前面脫水速度調快,HD畢，無不適情形,0.9744,0.0060,Y,HD畢，無不適情形，perm cath on heparine lock紗布覆蓋，由家屬陪伴...,FIR || 其他-今日CVS門診診查結果為排定4/28陳哲伸醫師協助PTA。，其他-透析後...
495,060F95505,2018-10-17 08:05:00,172/67,16,8,N,N,Dr曾改EPO NESP 20 μg(mcg) QW || 無,透析後無不適，防跌評估0分,0.1998,0.0006,N,透析後無不適，防跌評估0分，A-V Graft止血後，紗布覆蓋，家屬陪伴坐輪椅離開HDR,，，，，Dr曾改EPO NESP 20 μg(mcg) QW，


In [40]:
pd.DataFrame(pred_dict).to_csv('./baseline_predict_test_sample500_intra+peri.csv', index=False, encoding='utf-8-sig')

In [22]:
PeriHD_context

['HD END，病人表透析後無不適', '病人表透析後無不適，已確定止血', '已確定止血，透析後評估0分', '透析後評估0分，自行離開hdr.']

In [23]:
IntraHD_context

['腿部運動 || 無',
 '無 || 無',
 '無 || 無',
 'Recormon 2000 IU QW(W3)+NESP 20 μg(mcg) QW(W3) iv at 19:47. || 無',
 '無 || 無']