In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import date

Data Loading

In [22]:

def load_data(file):
    mdro = pd.read_excel(file, sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})
    report = pd.read_excel(file, sheet_name='微生物報告', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})
    info = pd.read_excel(file, sheet_name='就醫資訊', 
                        dtype={'病歷號碼':'str', '就醫序號':'int'})
    # anti = pd.read_excel(file, sheet_name='抗生素使用', 
    #                     dtype={'病歷號碼':'str', '就醫序號':'int'})
    examValue = pd.read_excel(file, sheet_name='檢驗數值', 
                        dtype={'病歷號碼':'str', '就醫序號':'int'})
    
    # data processing 
    mdro = mdro[['病歷號碼', 'time', '就醫序號','申請編號', '菌株', '預測值', '部位', 'sendTime']]
    mdro.rename({'time':'openTime'}, axis=1, inplace=True)
    
    report = report[['病歷號碼', 'bed', '就醫序號','申請編號', 'strain','部位', '抗藥性菌株', 'examTime','reportTime']]
    
    cdss = pd.merge(report, mdro, how='left', on=['病歷號碼', '就醫序號','申請編號', '部位'])
    cdss = cdss.drop_duplicates()
    cdss['miniTime_Open'] = cdss.groupby(['病歷號碼', 'bed','就醫序號', '申請編號', '部位', '菌株', '預測值',  'sendTime'])['openTime'].transform('min')
    
    info = info[['病歷號碼', '就醫序號', '性別', '生日', '年齡', '來院日期', '離院日期', '天數', '門急住', '科別代碼', '科別']]
    info.rename({'門急住':'bed'}, axis=1, inplace=True)
    
    cdss_info = pd.merge(cdss, info, how='left', on=['病歷號碼', '就醫序號', 'bed'])
    
    # Filtering mini open CDSS time
    CDSS = cdss_info[cdss_info.openTime==cdss_info.miniTime_Open].drop_duplicates()
    
    # Add column : max time leaving hospital
    CDSS.離院日期 = pd.to_datetime(CDSS.離院日期).dt.date
    CDSS['MaxTime_leave'] = CDSS.groupby(['病歷號碼', 'bed','就醫序號', '申請編號', '部位', '菌株', '預測值', 'sendTime', '來院日期'])['離院日期'].transform('max')
    # CDSS = pd.merge(cdss_info, anti, how='left', on=['病歷號碼','就醫序號'])
    # CDSS = CDSS.drop_duplicates()
    
    # Predict Level
    condition_1 = [(CDSS['預測值'] <= 20), (CDSS['預測值'] > 20)&(CDSS['預測值'] <= 40), 
             (CDSS['預測值'] > 40)&(CDSS['預測值'] <= 60), (CDSS['預測值'] > 60)&(CDSS['預測值'] <= 80),
             (CDSS['預測值'] > 80)&(CDSS['預測值'] <= 100)]

    value_1 = ['0~20%', '21~40%', '41~60%', '61~80%', '81~100%']
    CDSS['PredLevel'] = np.select(condition_1, value_1) 
    
    # Stain Category
    condition_2 =[
                  (CDSS['菌株'].str.contains('Enterococcus')),
                  (CDSS['菌株'].str.contains('Morganella morganii')),
                  (CDSS['菌株'].str.contains('Klebsiella pneumoniae')),
                  (CDSS['菌株'].str.contains('Pseudomonas aeruginosa')),
                  (CDSS['菌株'].str.contains('Escherichia coli')),
                  (CDSS['菌株'].str.contains('Acinetobacter baumannii')),
                  (CDSS['菌株'].str.contains('Staphylococcus aureus')),
                  (CDSS['菌株'].str.contains('Enterobacter cloacae'))
                ]
    
    value_2 = ['Enterococcus faecium', 'Morganella morganii', 'Klebsiella pneumoniae', 'Pseudomonas aeruginosa',
               'Escherichia coli', 'Acinetobacter baumannii', 'Staphylococcus aureus', 'Enterobacter cloacae']
    
    CDSS['Group_Strain'] = np.select(condition_2, value_2) 
    
    # exam Year & Month
    CDSS["examDate"] = pd.to_datetime( 
                                  pd.to_datetime(CDSS.examTime).dt.year.map(str) + '-' + \
                                  pd.to_datetime(CDSS.examTime).dt.month.map(str) + '-' + \
                                  pd.to_datetime(CDSS.examTime).dt.day.map(str)
                                )
    
    CDSS['Resist_bool'] = np.where(CDSS['抗藥性菌株']==1, True, False)
    CDSS.rename({'年齡':'Age', '預測值':'PredValue'}, axis=1, inplace=True)
    
    return CDSS
    

In [26]:
# 智抗菌平台報表_20220331.xlsx

file = r'C:\Users\User\VSCODE\CDSS\source\資訊室\智抗菌平台報表_20220610.xlsx'
# file = r'C:\Users\User\VSCode\CDSS\output\智抗菌平台報表_20220502.xlsx'
CDSS = load_data(file)
# CDSS.head(10)

file_1 = r'C:\Users\User\VSCODE\CDSS\source\資訊室\智抗菌平台報表_20220502.xlsx'

CDSS_1 = load_data(file_1)

In [27]:
old_file = r'C:\Users\User\VSCODE\CDSS\source\Clean\CDSS_0502.xlsx'

CDSS_old = pd.read_excel(old_file, dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

CDSS = pd.concat([CDSS_old, CDSS, CDSS_1])

# Filter Fault Time
CDSS['sendTime'] = pd.to_datetime(CDSS['sendTime'])
# CDSS = CDSS[(CDSS['sendTime'] < pd.to_datetime('2022-03-07 00:00')) | (CDSS['sendTime'] > pd.to_datetime('2022-03-21 23:59'))]

CDSS = CDSS.drop_duplicates().sort_values(['病歷號碼', '申請編號', 'examTime']).reset_index(drop=True)

  uniques, codes = table.factorize(


In [28]:
# Output File
# CDSS.to_excel(r'C:\Users\User\VSCode\CDSS_report_mdro_info.xlsx')
# CDSS.to_excel(r'C:\Users\User\VSCode\CDSS\output\CDSS_0628.xlsx', index=False)

In [14]:
print(CDSS.columns)

print(CDSS_old.columns)

Index(['病歷號碼', 'bed', '就醫序號', '申請編號', 'strain', '部位', '抗藥性菌株', 'examTime',
       'reportTime', 'openTime', '菌株', 'PredValue', 'sendTime',
       'miniTime_Open', '性別', '生日', 'Age', '來院日期', '離院日期', '天數', '科別代碼', '科別',
       'MaxTime_leave', 'PredLevel', 'Group_Strain', 'examDate',
       'Resist_bool'],
      dtype='object')
Index(['病歷號碼', '就醫序號', '性別', '生日', '年齡', '來院日期', '離院日期', '天數', '門急住', '科別代碼',
       '科別', '護理站', '床號'],
      dtype='object')


In [2]:
mdro = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\智抗菌平台報表_20220331.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

report = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\智抗菌平台報表_20220331.xlsx', sheet_name='微生物報告', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

info = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\智抗菌平台報表_20220331.xlsx', sheet_name='就醫資訊', 
                        dtype={'病歷號碼':'str', '就醫序號':'int'})

In [47]:
mdro = mdro[['病歷號碼', 'time', '申請編號', '菌株', '預測值', '部位', 'sendTime']]
mdro.rename({'time':'openTime'}, axis=1, inplace=True)
    
report = report[['病歷號碼', 'bed', '申請編號', 'strain','部位', '抗藥性菌株', 'examTime','reportTime']]

cdss = pd.merge(report, mdro, how='left', on=['病歷號碼','申請編號', '部位'])
cdss = cdss.drop_duplicates()
cdss = cdss[(cdss.bed=='急診') | (cdss.bed=='住院')]

cdss['miniTime_Open'] = cdss.groupby(['病歷號碼', 'bed', '申請編號', '部位', '菌株', '預測值',  'sendTime'])['openTime'].transform('min')

info = info[['病歷號碼', '性別', '生日', '年齡', '來院日期', '離院日期', '天數', '門急住', '科別代碼', '科別']]
info.rename({'門急住':'bed'}, axis=1, inplace=True)

cdss_info = pd.merge(cdss, info, how='left', on=['病歷號碼'])

 # Filtering mini open CDSS time
CDSS = cdss_info[cdss_info.openTime==cdss_info.miniTime_Open].drop_duplicates()

# Add column : max time leaving hospital
CDSS.離院日期 = pd.to_datetime(CDSS.離院日期).dt.date
# CDSS['MaxTime_leave'] = CDSS.groupby(['病歷號碼', 'bed', '申請編號', '部位', '菌株', '預測值', 'sendTime', '來院日期'])['離院日期'].transform('max')
# # CDSS = pd.merge(cdss_info, anti, how='left', on=['病歷號碼','就醫序號'])
# # CDSS = CDSS.drop_duplicates()

# # Predict Level
# condition_1 = [(CDSS['預測值'] <= 20), (CDSS['預測值'] > 20)&(CDSS['預測值'] <= 40), 
#           (CDSS['預測值'] > 40)&(CDSS['預測值'] <= 60), (CDSS['預測值'] > 60)&(CDSS['預測值'] <= 80),
#           (CDSS['預測值'] > 80)&(CDSS['預測值'] <= 100)]

# value_1 = ['0~20%', '21~40%', '41~60%', '61~80%', '81~100%']
# CDSS['PredLevel'] = np.select(condition_1, value_1) 

# # Stain Category
# condition_2 =[
#               (CDSS['菌株'].isin(['Enterococcus faecium(VRE)', 'Enterococcus faecium', 'Enterococcus faecalis'])),
#               (CDSS['菌株'].isin(['Morganella morganii'])),
#               (CDSS['菌株'].isin(['Klebsiella pneumoniae'])),
#               (CDSS['菌株'].isin(['Pseudomonas aeruginosa'])),
#               (CDSS['菌株'].isin(['Escherichia coli'])),
#               (CDSS['菌株'].isin(['Acinetobacter baumannii', 'Acinetobacter baumannii/calcoaceticus complex'])),
#               (CDSS['菌株'].isin(['Staphylococcus aureus', 'Staphylococcus aureus(MRSA)'])),
#               (CDSS['菌株'].isin(['Enterobacter cloacae']))
#             ]

# value_2 = ['Enterococcus faecium', 'Morganella morganii', 'Klebsiella pneumoniae', 'Pseudomonas aeruginosa',
#             'Escherichia coli', 'Acinetobacter baumannii', 'Staphylococcus aureus', 'Enterobacter cloacae']

# CDSS['Group_Strain'] = np.select(condition_2, value_2)

# # exam Year & Month
# CDSS["examDate"] = pd.to_datetime( 
#                               pd.to_datetime(CDSS.examTime).dt.year.map(str) + '-' + \
#                               pd.to_datetime(CDSS.examTime).dt.month.map(str) + '-' + \
#                               pd.to_datetime(CDSS.examTime).dt.day.map(str)
#                             )

# CDSS['Resist_bool'] = np.where(CDSS['抗藥性菌株']==1, True, False)
# CDSS.rename({'年齡':'Age', '預測值':'PredValue'}, axis=1, inplace=True)


CDSS_2022_05_26

In [16]:
mdro_1 = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\資訊室\智抗菌平台報表_20220210.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

mdro_2 = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\資訊室\智抗菌平台報表_20220309.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

mdro_3 = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\資訊室\智抗菌平台報表_20220331.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

mdro_4 = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\資訊室\智抗菌平台報表_20220502.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})

mdro_5 = pd.read_excel(r'C:\Users\User\VSCode\CDSS\source\資訊室\智抗菌平台報表_20220610.xlsx', sheet_name='MDROs', 
                        dtype={'病歷號碼':'str', '申請編號':'int', '就醫序號':'int'})



In [17]:
mdro = pd.concat([mdro_1, mdro_2, mdro_3, mdro_4, mdro_5]).drop_duplicates().sort_values(['病歷號碼', 'time', '申請編號']).reset_index(drop=True)
mdro.rename({'time':'openTime'}, axis=1, inplace=True)

# mdro_SA = mdro[mdro['菌株'].str.contains('Staphylococcus aureus')]
mdro['miniTime_Open'] = mdro.groupby(['病歷號碼', '申請編號', '部位', '菌株', '預測值',  'sendTime'])['openTime'].transform('min')

In [18]:
# Predict Level
condition_1 = [(mdro['預測值'] <= 20), (mdro['預測值'] > 20)&(mdro['預測值'] <= 40), 
          (mdro['預測值'] > 40)&(mdro['預測值'] <= 60), (mdro['預測值'] > 60)&(mdro['預測值'] <= 80),
          (mdro['預測值'] > 80)&(mdro['預測值'] <= 100)]

value_1 = ['0~20%', '21~40%', '41~60%', '61~80%', '81~100%']
mdro['PredLevel'] = np.select(condition_1, value_1) 

In [19]:
# Stain Category
condition_2 =[
              (mdro['菌株'].str.contains('Enterococcus faecium')),
              (mdro['菌株'].str.contains('Enterococcus faecalis')),
              (mdro['菌株'].str.contains('Morganella morganii')),
              (mdro['菌株'].str.contains('Klebsiella pneumoniae')),
              (mdro['菌株'].str.contains('Pseudomonas aeruginosa')),
              (mdro['菌株'].str.contains('Escherichia coli')),
              (mdro['菌株'].str.contains('Acinetobacter baumannii')),
              (mdro['菌株'].str.contains('Staphylococcus aureus')),
              (mdro['菌株'].str.contains('Enterobacter cloacae'))
            ]

value_2 = ['Enterococcus faecium', 'Enterococcus faecalis', 'Morganella morganii', 'Klebsiella pneumoniae', 'Pseudomonas aeruginosa',
            'Escherichia coli', 'Acinetobacter baumannii', 'Staphylococcus aureus', 'Enterobacter cloacae']

mdro['Group_Germ'] = np.select(condition_2, value_2) 


In [20]:
# mdro_SA[mdro_SA.miniTime_Open==mdro_SA.openTime].to_excel(r'C:\Users\User\VSCode\CDSS\output\MDRO_SA_0526.xlsx', index=False)
# mdro.to_excel(r'C:\Users\User\VSCode\CDSS\output\MDRO_0610.xlsx', index=False)

06_10_2022

In [26]:
Yakiniku_Rice_Burger = pd.read_csv(r"C:\Users\User\VSCode\CDSS\source\Clean\元氣牛肉珍珠堡.csv", dtype={'病歷號碼':'str'}).drop_duplicates()
# Yakiniku_Rice_Burger = pd.read_csv(r"\\10.20.3.235\抗生素cdss\RawData\CDSS_Row_1110609.csv", dtype={'病歷號碼':'str'}).drop_duplicates()



  Yakiniku_Rice_Burger = pd.read_csv(r"C:\Users\User\VSCode\CDSS\source\Clean\元氣牛肉珍珠堡.csv", dtype={'病歷號碼':'str'}).drop_duplicates()


In [25]:
Yakiniku_Rice_Burger.columns

Index(['表單號', '住院號', '申請編號', '病歷號碼', '生日(年月日)', '主診斷1', '診斷2', '診斷3', '診斷4',
       '診斷5', '開單醫師', '開單科別', '病房', '住院日', '出院日', '申請日', '採檢日', '簽收日', '報告日',
       '項目代碼', '項目名稱', '來源', '檢體', '菌名', 'strain', 'bed', '檢驗部收到時間', '發報告時間',
       '參考值', 'Lab_檢驗日期', 'Lab_報告日期', 'Lab_檢驗項目', 'Lab_檢驗值', 'Lab_表單號',
       'Lab_申請號', '床號', '性別', '体重', '出院診斷_1', '出院診斷_2', '出院診斷_3', '出院診斷_4',
       '出院診斷_5', '出院診斷_6', '出院診斷_7', '死亡日期', '時間', 'ICU'],
      dtype='object')