从原始的json数据生成patient维度数据  

In [1]:
import json
import pandas as pd
import numpy as np
import datetime
import pickle

# data preparation

In [2]:
raw_data = pd.read_json("/Users/weiyuna/Desktop/工作/合作项目/医大一-胃癌/模型/code_202203/data/fuzhu.json")
data = raw_data.copy()
data
data.to_csv("./data/data_fuzhu.csv", index = False)

## preprocessing

In [3]:
data["性别"] = data["人口学_性别"].map(lambda x: 1 if x == "男" else 0)

yes_no_list = [
    'C0*既往化疗',
    '治疗史_既往放疗',
    
    '合并慢性疾病_充血性心力衰竭',
    '合并慢性疾病_心绞痛',
    '合并慢性疾病_心肌梗死',
    '合并慢性疾病_慢阻肺',
    '合并慢性疾病_消化性溃疡',
    '合并慢性疾病_甲状腺疾病',
    '合并慢性疾病_糖尿病',
    '合并慢性疾病_自身免疫性疾病',
    '合并慢性疾病_获得性免疫缺陷综合征',
]
for feature_name in yes_no_list:
    data[feature_name] = data[feature_name].map(lambda x: 1 if x == "是" else 0)

In [4]:
# 合并肿瘤状态
status_feature_list = [
    '肿瘤状态_pM分期',
    '肿瘤状态_pN分期',
    '肿瘤状态_pT分期'
]
for status_feature in status_feature_list:
    data[status_feature] = data[status_feature].map(lambda x: x[:3] if x is not None else x)

In [5]:
# 计算周期年份 便于计算训练集和测试集
data["年份"] = data["C1"].map(lambda x: datetime.datetime.strptime(x[0], "%Y-%m-%d %H:%M:%S").year)

In [6]:
# 检验指标的动态变量处理为上周期检验结果的最低值
lab_feature = [
    '血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血',
    '血常规血生化_生化检查:前白蛋白(PA)-静脉血',
    '血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血',
    '血常规血生化_生化检查:总胆红素(TBIL)-静脉血',
    '血常规血生化_生化检查:白蛋白(ALB)-静脉血',
    '血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血',
    '血常规血生化_生化检查:肌酐(Crea)-静脉血',
    '血常规血生化_血常规:中性粒细胞计数-静脉血',
    '血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血',
    '血常规血生化_血常规:白细胞计数(WBC#)-静脉血',
    '血常规血生化_血常规:血小板计数(PLT#)-静脉血',
    '血常规血生化_血常规:血红蛋白(Hb)-静脉血']
for cycle in range(1,9):
    for feature in lab_feature:
        column_name = "C"+str(cycle)+"*"+feature
        last_column_name = "C"+str(cycle-1)+"*"+feature
        data[column_name] = raw_data[last_column_name].map(lambda x: min(x) if len(x)>0 else np.nan)

In [7]:
# 预防用药处理为上周期用药的布尔值
for cycle in range(1,9):
    column_name = "C"+str(cycle)+"*预防性抗骨髓抑制药物"
    last_column_name = "C"+str(cycle-1)+"*预防性抗骨髓抑制药物"
    if cycle == 1:
        data[column_name] = 0
    else:
        data[column_name] = raw_data[last_column_name].map(lambda x: 0 if len(x) == 0 else 1)

In [8]:
# 肾小球滤过率和ps评分对应上周期的
for cycle in range(1,9):
    column_name = "C"+str(cycle)+"*肾小球滤过率"
    data["C"+str(cycle)+"*肾小球滤过率"] = raw_data["C"+str(cycle-1)+"*肾小球滤过率"]
    data["C"+str(cycle)+"*身体状况评价_PS评分"] = raw_data["C"+str(cycle-1)+"*身体状况评价_PS评分"]

## cycle interval

In [9]:
interval_result = list()
for i in range(len(data)):
    event = data["event"][i]
    cycle_time_list = data["cycle_info"][i]
    interval_list = list()
    if event == 0:
        time_list = cycle_time_list[:9]
        
    else:
        event_cycle = data["首次不良反应#周期"][i]
        event_time = data["首次不良反应#时间"][i]
        time_list = cycle_time_list[:int(event_cycle)]
        time_list.append(event_time)
    
    for i in range(1, len(time_list)):
        j = i-1
        interval = datetime.datetime.strptime(time_list[i], "%Y-%m-%d %H:%M:%S") - datetime.datetime.strptime(time_list[j], "%Y-%m-%d %H:%M:%S")
        interval_list.append(interval.days)
    
    if event == 0 and len(interval_list) < 8:
        interval_list.append(45)
    
    if len(interval_list) < 8:
        interval_list = interval_list + [np.nan for i in range(8 -len(interval_list))]

    interval_result.append(interval_list)

interval_columns = ["C"+str(num)+"*interval" for num in range(1,9)]
t1 = pd.DataFrame(interval_result).apply(pd.Series)
t1.columns = interval_columns
data = data.join(t1)

In [10]:
pd.set_option("max_columns", None)
data

Unnamed: 0,patient_sn,event,首次不良反应#周期,首次不良反应#时间,人口学_住院号,人口学_出生日期,年龄,人口学_性别,人口学_首诊年龄(岁),体重,身高,ABS,BMI,合并慢性疾病_充血性心力衰竭,合并慢性疾病_心绞痛,合并慢性疾病_心肌梗死,合并慢性疾病_慢阻肺,合并慢性疾病_消化性溃疡,合并慢性疾病_甲状腺疾病,合并慢性疾病_糖尿病,合并慢性疾病_自身免疫性疾病,合并慢性疾病_获得性免疫缺陷综合征,治疗史_既往放疗,肿瘤状态_cM分期,肿瘤状态_cN分期,肿瘤状态_cT分期,肿瘤状态_pM分期,肿瘤状态_pN分期,肿瘤状态_pT分期,肿瘤状态_临床分期,肿瘤状态_病理分期,肿瘤状态_胃部肿瘤位置,cycle_info,cycle_num,C0*既往化疗,C0*既往胃癌手术,C0*肾小球滤过率,C0*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C0*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C0*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C0*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C0*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C0*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C0*血常规血生化_生化检查:肌酐(Crea)-静脉血,C0*血常规血生化_血常规:中性粒细胞计数-静脉血,C0*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C0*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C0*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C0*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C0*身体状况评价_PS评分,C0*预后影响指标,C1,C1*不良反应,C1*不良反应#时间,C1*化疗药物,C1*放射治疗次数,C1*治疗性抗骨髓抑制药物,C1*肾小球滤过率,C1*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C1*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C1*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C1*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C1*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C1*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C1*血常规血生化_生化检查:肌酐(Crea)-静脉血,C1*血常规血生化_血常规:中性粒细胞计数-静脉血,C1*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C1*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C1*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C1*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C1*身体状况评价_PS评分,C1*预防性抗骨髓抑制药物,C2,C2*不良反应,C2*不良反应#时间,C2*化疗药物,C2*放射治疗次数,C2*治疗性抗骨髓抑制药物,C2*肾小球滤过率,C2*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C2*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C2*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C2*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C2*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C2*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C2*血常规血生化_生化检查:肌酐(Crea)-静脉血,C2*血常规血生化_血常规:中性粒细胞计数-静脉血,C2*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C2*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C2*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C2*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C2*身体状况评价_PS评分,C2*预防性抗骨髓抑制药物,C3,C3*不良反应,C3*不良反应#时间,C3*化疗药物,C3*放射治疗次数,C3*治疗性抗骨髓抑制药物,C3*肾小球滤过率,C3*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C3*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C3*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C3*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C3*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C3*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C3*血常规血生化_生化检查:肌酐(Crea)-静脉血,C3*血常规血生化_血常规:中性粒细胞计数-静脉血,C3*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C3*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C3*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C3*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C3*身体状况评价_PS评分,C3*预防性抗骨髓抑制药物,C4,C4*不良反应,C4*不良反应#时间,C4*化疗药物,C4*放射治疗次数,C4*治疗性抗骨髓抑制药物,C4*肾小球滤过率,C4*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C4*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C4*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C4*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C4*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C4*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C4*血常规血生化_生化检查:肌酐(Crea)-静脉血,C4*血常规血生化_血常规:中性粒细胞计数-静脉血,C4*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C4*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C4*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C4*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C4*身体状况评价_PS评分,C4*预防性抗骨髓抑制药物,C5,C5*不良反应,C5*不良反应#时间,C5*化疗药物,C5*放射治疗次数,C5*治疗性抗骨髓抑制药物,C5*肾小球滤过率,C5*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C5*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C5*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C5*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C5*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C5*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C5*血常规血生化_生化检查:肌酐(Crea)-静脉血,C5*血常规血生化_血常规:中性粒细胞计数-静脉血,C5*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C5*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C5*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C5*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C5*身体状况评价_PS评分,C5*预防性抗骨髓抑制药物,C6,C6*不良反应,C6*不良反应#时间,C6*化疗药物,C6*放射治疗次数,C6*治疗性抗骨髓抑制药物,C6*肾小球滤过率,C6*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C6*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C6*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C6*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C6*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C6*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C6*血常规血生化_生化检查:肌酐(Crea)-静脉血,C6*血常规血生化_血常规:中性粒细胞计数-静脉血,C6*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C6*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C6*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C6*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C6*身体状况评价_PS评分,C6*预防性抗骨髓抑制药物,C7,C7*不良反应,C7*不良反应#时间,C7*化疗药物,C7*放射治疗次数,C7*治疗性抗骨髓抑制药物,C7*肾小球滤过率,C7*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C7*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C7*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C7*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C7*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C7*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C7*血常规血生化_生化检查:肌酐(Crea)-静脉血,C7*血常规血生化_血常规:中性粒细胞计数-静脉血,C7*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C7*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C7*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C7*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C7*身体状况评价_PS评分,C7*预防性抗骨髓抑制药物,C8,C8*不良反应,C8*不良反应#时间,C8*化疗药物,C8*放射治疗次数,C8*治疗性抗骨髓抑制药物,C8*肾小球滤过率,C8*血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血,C8*血常规血生化_生化检查:前白蛋白(PA)-静脉血,C8*血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血,C8*血常规血生化_生化检查:总胆红素(TBIL)-静脉血,C8*血常规血生化_生化检查:白蛋白(ALB)-静脉血,C8*血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血,C8*血常规血生化_生化检查:肌酐(Crea)-静脉血,C8*血常规血生化_血常规:中性粒细胞计数-静脉血,C8*血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血,C8*血常规血生化_血常规:白细胞计数(WBC#)-静脉血,C8*血常规血生化_血常规:血小板计数(PLT#)-静脉血,C8*血常规血生化_血常规:血红蛋白(Hb)-静脉血,C8*身体状况评价_PS评分,C8*预防性抗骨髓抑制药物,性别,年份,C1*interval,C2*interval,C3*interval,C4*interval,C5*interval,C6*interval,C7*interval,C8*interval
0,002f98cd8201f14bd9620e885a9d7377,1,2.0,2017-04-27 00:00:00,"[0016527671, 0014631462, 14632258, 0014788778,...",1971-05-02 00:00:00,45,男,45,70.0,170.0,1.78010,24.221453,0,0,0,0,0,0,0,0,0,0,cM0,cN0,cT4b,pM0,pN0,pT4,,IIB期,"[胃体, 胃小弯, 胃底]","[2017-03-07 13:01:00, 2017-03-29 12:06:00, 201...",8,0,1,114.643724,[16.0],[20.1],"[14.0, 14.0]",[18.2],[40.3],[58.0],[61.0],[2.1],[1.74],[4.2],[177.0],[139.0],1.0,40.30870,"[2017-03-07 13:01:00, 2017-03-29 12:06:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],114.643724,16.0,20.10,14.0,18.2,40.3,58.0,61.0,2.10000,1.74,4.20,177.0,139.0,1.0,0,"[2017-03-29 12:06:00, 2017-04-27 15:05:00]",1,2017-04-27 00:00:00,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],119.627708,66.0,,56.0,18.4,40.4,62.0,55.0,1.63012,1.28,3.32,139.0,133.0,,0,"[2017-04-27 15:05:00, 2017-05-19 11:50:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],117.044751,74.0,,41.0,24.3,40.4,68.0,58.0,1.36004,1.15,2.81,108.0,137.0,1.0,1,"[2017-05-19 11:50:00, 2017-06-23 14:14:00]",1,2017-06-23 00:00:00,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],,63.0,17.8,49.0,32.0,38.0,63.0,,9.90947,1.27,12.07,102.0,134.0,,0,"[2017-06-23 14:14:00, 2017-07-18 09:02:00]",1,2017-07-18 00:00:00,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],114.055948,13.0,8.6,21.0,12.8,30.4,59.0,68.0,1.37004,1.16,2.94,68.0,113.0,1.0,1,"[2017-07-18 09:02:00, 2017-08-17 08:16:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],115.425213,26.0,20.1,34.0,24.6,36.3,76.0,60.0,2.21195,1.48,4.15,94.0,129.0,1.0,1,"[2017-08-17 08:16:00, 2017-09-12 07:38:00]",0,,[替吉奥胶囊],0,[],105.276855,41.0,,47.0,19.1,43.3,91.0,78.0,2.59168,1.17,4.16,98.0,131.0,,1,"[2017-09-12 07:38:00, 2017-10-27 07:38:00]",0,,[替吉奥胶囊],0,[],114.643724,23.0,17.0,29.0,28.2,35.5,68.0,61.0,,,,,,,1,1,2017,21,28.0,,,,,,
1,0142e3b3ee4b56437e9b4904cde760de,0,,,"[0008376801, 08376801]",1962-05-28 00:00:00,48,男,48,71.0,,,,0,0,0,0,0,0,0,0,0,0,,,,pM0,pN1,pT2,,IIA期,[胃窦],"[2011-02-21 13:28:00, 2011-03-17 13:39:00, 201...",8,0,1,98.483763,[24.0],[17.7],"[22.0, 22.0]",[13.7],[40.3],[60.0],[81.0],[2.47445],[2.46],[6.05],[220.0],[130.0],1.0,40.31230,"[2011-02-21 13:28:00, 2011-03-17 13:39:00]",0,,"[卡培他滨片, 注射用奥沙利铂, 氟尿嘧啶注射液]",0,[],98.483763,24.0,17.70,22.0,13.7,40.3,60.0,81.0,2.47445,2.46,6.05,220.0,130.0,1.0,0,"[2011-03-17 13:39:00, 2011-04-07 14:05:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],128.502263,21.0,20.7,20.0,20.8,47.2,55.0,65.0,2.75010,1.85,5.15,190.0,126.0,,0,"[2011-04-07 14:05:00, 2011-04-28 14:07:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],126.152077,45.0,22.6,37.0,16.5,44.2,51.0,66.0,2.17035,1.85,4.55,160.0,125.0,1.0,0,"[2011-04-28 14:07:00, 2011-05-19 09:07:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],136.057182,41.0,26.0,41.0,18.3,45.9,59.0,62.0,4.21281,2.31,7.43,150.0,127.0,1.0,0,"[2011-05-19 09:07:00, 2011-06-21 09:53:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],117.489618,24.0,17.7,27.0,18.3,40.2,62.0,70.0,2.38337,2.07,5.17,101.0,116.0,1.0,0,"[2011-06-21 09:53:00, 2011-07-14 13:45:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],,,,,,,,,,,,,,,0,"[2011-07-14 13:45:00, 2011-08-12 14:25:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],133.450529,23.0,23.4,29.0,22.9,43.5,79.0,62.0,3.02103,1.50,5.01,100.0,124.0,1.0,0,"[2011-08-12 14:25:00, 2011-09-26 14:25:00]",0,,"[卡培他滨片, 注射用奥沙利铂, 卡莫氟片]",0,[],,,,,,,,,,,,,,,0,1,2011,24,21.0,21.0,20.0,33.0,23.0,29.0,45.0
2,0203dc965807312951ecf49ee6fbdbeb,0,,,"[15331707, 0015331707, 0015671855, 15570026, 1...",1963-07-14 00:00:00,54,男,53,71.0,174.0,1.81730,23.450918,0,0,0,0,0,0,0,0,0,0,cM0,cN1,cT2,pM0,pN1,pT2,,IIA期,"[胃窦, 胃小弯]","[2017-09-27 13:26:00, 2017-10-23 13:31:00, 201...",8,0,1,118.766533,[11.0],[19.0],"[12.0, 12.0]",[11.3],[33.8],[59.0],[67.0],[2.3712],[2.2],[5.2],[200.0],[126.0],1.0,33.81100,"[2017-09-27 13:26:00, 2017-10-23 13:31:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],118.766533,11.0,19.00,12.0,11.3,33.8,59.0,67.0,2.37120,2.20,5.20,200.0,126.0,1.0,0,"[2017-10-23 13:31:00, 2017-11-23 08:01:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],123.198740,28.0,,23.0,7.9,41.3,65.0,65.0,3.05760,2.08,5.88,169.0,140.0,,0,"[2017-11-23 08:01:00, 2017-12-14 13:53:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],118.766533,22.0,21.6,17.0,12.7,39.3,70.0,67.0,2.49264,2.54,5.77,162.0,139.0,1.0,0,"[2017-12-14 13:53:00, 2018-01-09 13:41:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],116.658191,25.0,19.6,31.0,12.9,39.6,67.0,68.0,1.72095,1.93,4.47,144.0,138.0,1.0,0,"[2018-01-09 13:41:00, 2018-02-06 10:30:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],125.529826,22.0,15.8,32.0,8.8,38.1,99.0,64.0,1.56791,1.76,3.91,104.0,138.0,1.0,0,"[2018-02-06 10:30:00, 2018-03-09 10:42:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],116.658191,21.0,14.4,28.0,12.7,33.6,77.0,68.0,1.71741,1.90,4.37,96.0,125.0,1.0,1,"[2018-03-09 10:42:00, 2018-04-04 11:07:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],,,,,,,,,,,,,,,1,"[2018-04-04 11:07:00, 2018-05-19 11:07:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],127.942782,19.0,19.3,25.0,10.4,39.3,87.0,63.0,1.91902,1.67,4.19,81.0,131.0,,1,1,2017,26,30.0,21.0,25.0,27.0,31.0,26.0,45.0
3,021657d2866fd95ea6595f938ace2172,0,,,"[16150762, 0016150762, 0015048037, 16117086, 0...",1958-01-20 00:00:00,63,男,63,60.0,178.0,1.70090,18.937003,0,0,0,0,0,0,0,0,0,0,,,,pM0,pN0,pT4,,IIB期,"[胃窦, 胃小弯]","[2021-03-29 08:02:32, 2021-04-21 08:50:44, 202...",6,0,1,95.100000,[12.0],[19.9],[12.0],[4.0],[37.2],[89.0],[71.0],[4.74898],[1.98],[7.34],[338.0],[99.0],,37.20990,"[2021-03-29 08:02:32, 2021-04-21 08:50:44]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],95.100000,12.0,19.90,12.0,4.0,37.2,89.0,71.0,4.74898,1.98,7.34,338.0,99.0,,0,"[2021-04-21 08:50:44, 2021-05-27 12:12:55]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],103.400000,7.0,19.4,12.0,6.4,37.3,77.0,58.0,3.08028,1.70,5.32,273.0,95.0,1.0,0,"[2021-05-27 12:12:55, 2021-06-23 08:02:43]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],97.400000,11.0,22.8,14.0,7.4,41.0,101.0,67.0,3.98142,1.56,6.06,243.0,109.0,1.0,1,"[2021-06-23 08:02:43, 2021-07-30 12:26:25]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],100.600000,11.0,20.1,22.0,8.2,41.1,90.0,62.0,2.55136,1.68,4.76,195.0,107.0,,1,"[2021-07-30 12:26:25, 2021-09-06 13:30:33]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],104.100000,14.0,23.1,14.0,6.4,38.1,69.0,57.0,2.42109,1.46,4.27,206.0,102.0,1.0,1,"[2021-09-06 13:30:33, 2021-10-21 13:30:33]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],99.300000,11.0,20.2,14.0,7.1,35.8,75.0,64.0,2.69225,1.34,4.45,198.0,106.0,1.0,0,[],0,,[],0,[],,,,,,,,,,,,,,,0,[],0,,[],0,[],,,,,,,,,,,,,,,0,1,2021,23,36.0,26.0,37.0,38.0,45.0,,
4,034054cd4706ef79c817079340cdab89,0,,,"[0012838235, 12481314, 12496222, 12838235, 001...",1963-09-18 00:00:00,51,男,50,59.0,,,,0,0,0,0,0,0,0,0,0,0,,,,pM0,pN1,pT4,,IIIB期,"[后壁, 胃窦, 胃小弯]","[2015-04-22 12:39:00, 2015-05-20 13:55:00, 201...",8,0,1,123.521384,[46.0],[18.2],"[20.0, 20.0]",[11.0],[43.6],[72.0],[66.0],[2.45459],[3.3],[6.31],[243.0],[135.0],1.0,43.61650,"[2015-04-22 12:39:00, 2015-05-20 13:55:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],123.521384,46.0,18.20,20.0,11.0,43.6,72.0,66.0,2.45459,3.30,6.31,243.0,135.0,1.0,0,"[2015-05-20 13:55:00, 2015-06-18 14:02:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],110.661404,24.0,,19.0,11.2,42.4,67.0,60.0,3.50838,2.39,6.57,198.0,122.0,1.0,0,"[2015-06-18 14:02:00, 2015-07-17 13:33:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],,,,21.0,,,,,2.54824,2.74,6.01,199.0,130.0,,0,"[2015-07-17 13:33:00, 2015-08-13 14:04:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],125.822561,62.0,,34.0,10.3,44.8,59.0,65.0,,,,,,1.0,0,"[2015-08-13 14:04:00, 2015-09-22 10:47:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],133.219934,40.0,22.2,48.0,10.1,42.8,60.0,62.0,1.59249,2.64,4.87,170.0,129.0,1.0,0,"[2015-09-22 10:47:00, 2015-11-05 11:47:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],133.219934,61.0,22.5,47.0,13.4,42.4,87.0,62.0,1.65435,3.73,6.15,171.0,132.0,1.0,0,"[2015-11-05 11:47:00, 2015-12-15 12:01:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],112.214101,38.0,,26.0,11.3,40.4,79.0,58.0,1.28152,2.22,3.86,173.0,127.0,,0,"[2015-12-15 12:01:00, 2016-01-29 12:01:00]",0,,"[卡培他滨片, 注射用奥沙利铂]",0,[],,,,29.0,,,,,,,,,,,0,1,2015,28,29.0,28.0,27.0,39.0,44.0,40.0,45.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,fcd3e4da2f66e7d62bac9337c54a5439,0,,,[0023050712],1959-10-13 00:00:00,60,男,60,56.9,170.0,1.61242,19.688581,0,0,0,0,0,0,0,0,0,0,,,,pM0,pN2,pT3,,IIIA期,"[胃体, 贲门, 胃小弯]","[2020-08-13 14:06:38, 2020-09-01 14:20:12, 202...",7,0,1,107.991724,[8.0],[18.59],"[13.0, 13.0]",[11.0],[35.2],[63.0],[70.0],"[3.67732, 3.5169]",[1.63],[6.17],[369.0],[115.0],,35.20815,"[2020-08-13 14:06:38, 2020-09-01 14:20:12]",0,,[替吉奥胶囊],0,[],107.991724,8.0,18.59,13.0,11.0,35.2,63.0,70.0,3.51690,1.63,6.17,369.0,115.0,,0,"[2020-09-01 14:20:12, 2020-09-25 07:50:13]",0,,[替吉奥胶囊],0,[],104.800000,5.0,16.2,8.0,6.4,36.6,60.0,59.0,8.78969,1.76,11.83,427.0,117.0,1.0,0,"[2020-09-25 07:50:13, 2020-10-15 14:06:02]",0,,[替吉奥胶囊],0,[],104.800000,5.0,17.5,11.0,7.5,37.9,72.0,59.0,7.34225,2.33,10.75,405.0,124.0,,0,"[2020-10-15 14:06:02, 2020-11-05 08:36:44]",0,,[替吉奥胶囊],0,[],108.000000,7.0,18.6,11.0,4.9,35.2,69.0,54.0,2.93538,3.21,7.23,357.0,108.0,1.0,0,"[2020-11-05 08:36:44, 2020-11-25 08:27:12]",0,,[替吉奥胶囊],0,[],97.600000,8.0,24.0,12.0,11.0,40.8,76.0,69.0,3.75408,1.90,6.32,322.0,124.0,,0,"[2020-11-25 08:27:12, 2020-12-17 14:04:59]",0,,[替吉奥胶囊],0,[],103.400000,7.0,25.6,9.0,8.1,42.8,86.0,60.0,3.64325,3.00,7.67,277.0,125.0,,0,"[2020-12-17 14:04:59, 2021-01-31 14:04:59]",0,,[替吉奥胶囊],0,[],103.400000,6.0,23.7,15.0,11.9,38.7,74.0,60.0,3.57840,1.91,6.39,274.0,120.0,,0,[],0,,[],0,[],,,,,,,,,,,,,,,0,1,2020,19,23.0,20.0,20.0,19.0,22.0,45.0,
470,fd81c606879ef8ec683f5801faac8b49,1,3.0,2017-08-16 00:00:00,"[11014288, 0011014288]",1950-08-23 00:00:00,66,男,65,80.0,175.0,1.93860,26.122449,0,0,0,0,0,0,1,0,0,0,cMx,cN+,cT4,,,,,,"[胃体, 胃小弯]","[2017-05-05 10:40:00, 2017-06-13 08:12:00, 201...",8,0,1,98.412929,[10.0],[23.7],"[13.0, 10.0]",[13.2],[39.3],[54.0],[73.0],[2.4522500000000003],[1.19],[4.25],[147.0],[130.0],1.0,39.30595,"[2017-05-05 10:40:00, 2017-06-13 08:12:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],98.412929,10.0,23.70,10.0,13.2,39.3,54.0,73.0,2.45225,1.19,4.25,147.0,130.0,1.0,0,"[2017-06-13 08:12:00, 2017-07-07 10:57:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],113.239319,17.0,24.7,17.0,28.9,41.7,51.0,65.0,1.52040,0.83,2.80,71.0,119.0,1.0,0,"[2017-07-07 10:57:00, 2017-08-16 09:57:00]",1,2017-08-16 00:00:00,"[注射用奥沙利铂, 盐酸厄洛替尼片, 替吉奥胶囊]",0,[],101.716287,20.0,20.0,13.0,16.7,37.4,51.0,57.0,1.67034,0.86,2.91,152.0,119.0,,1,"[2017-08-16 09:57:00, 2017-09-20 10:46:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],113.239319,36.0,26.1,25.0,27.3,41.3,61.0,65.0,1.83912,0.89,3.16,106.0,139.0,1.0,1,"[2017-09-20 10:46:00, 2017-10-18 11:40:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],101.774307,10.0,23.7,13.0,29.9,40.2,61.0,71.0,1.97835,1.09,3.63,115.0,143.0,1.0,1,"[2017-10-18 11:40:00, 2017-11-09 07:52:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],109.165412,9.0,,14.0,26.7,41.9,70.0,67.0,3.58848,0.99,5.34,96.0,150.0,1.0,1,"[2017-11-09 07:52:00, 2017-12-01 12:41:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],86.786287,10.0,,22.0,12.5,41.7,64.0,81.0,3.18600,1.32,5.31,167.0,148.0,1.0,1,"[2017-12-01 12:41:00, 2018-01-15 12:41:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],,,,,,,,,,,,,,,1,1,2017,38,24.0,39.0,,,,,
471,fd9412166385394112a26e951a94df71,0,,,"[12821007, 13866435, 0012821007, 0013866435, 0...",1962-12-25 00:00:00,53,女,53,70.0,162.0,1.73130,26.672763,0,0,0,0,1,0,1,0,0,0,,,,pM0,pNx,pT1,,,"[后壁, 前壁, 胃底]","[2016-06-28 14:18:00, 2016-07-25 07:54:00, 201...",7,0,1,107.885815,[15.0],[14.9],"[17.0, 17.0]",[6.7],[40.7],[84.0],[48.0],[2.1012],[1.45],[4.08],[283.0],[104.0],1.0,40.70725,"[2016-06-28 14:18:00, 2016-07-25 07:54:00]",0,,[替吉奥胶囊],0,[],107.885815,15.0,14.90,17.0,6.7,40.7,84.0,48.0,2.10120,1.45,4.08,283.0,104.0,1.0,0,"[2016-07-25 07:54:00, 2016-08-25 14:27:00]",0,,[替吉奥胶囊],0,[],108.635688,8.0,18.3,16.0,8.0,41.7,74.0,47.0,2.41902,1.58,4.53,252.0,103.0,1.0,0,"[2016-08-25 14:27:00, 2016-09-23 14:23:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],114.555244,17.0,19.9,29.0,9.0,42.2,65.0,40.0,1.80744,2.01,4.43,183.0,119.0,1.0,0,"[2016-09-23 14:23:00, 2016-10-28 11:27:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊, 卡铂注射液]",0,[],,,,,,,,,,,,,,,0,"[2016-10-28 11:27:00, 2016-11-28 14:29:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],110.201066,15.0,18.3,22.0,8.6,41.9,66.0,45.0,1.83180,1.85,4.30,152.0,120.0,1.0,1,"[2016-11-28 14:29:00, 2017-01-06 13:46:00]",0,,[注射用奥沙利铂],0,[],112.731087,20.0,18.8,23.0,7.2,42.4,79.0,42.0,1.67698,2.03,4.39,131.0,123.0,1.0,0,"[2017-01-06 13:46:00, 2017-02-20 13:46:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],100.805076,9.0,,14.0,9.0,44.5,86.0,59.0,2.81996,1.60,4.93,197.0,119.0,1.0,0,[],0,,[],0,[],109.407070,12.0,17.5,17.0,5.4,41.2,94.0,46.0,3.14730,1.58,5.38,182.0,124.0,,0,0,2016,26,31.0,28.0,34.0,31.0,38.0,45.0,
472,feb4fa9b0e813eb1e2cb836a81a912e3,0,,,"[15332524, 0015332524]",1960-03-16 00:00:00,57,男,57,76.0,170.0,1.85690,26.297578,0,0,0,0,0,0,0,0,0,0,,,,pM0,pN1,pT2,,IIA期,"[后壁, 胃窦]","[2017-09-21 12:38:00, 2017-10-18 11:55:00, 201...",6,0,1,112.227092,[24.0],[21.2],"[15.0, 15.0]",[13.4],[42.1],[92.0],[69.0],[2.6226000000000003],[1.53],[4.65],[251.0],[128.0],1.0,42.10765,"[2017-09-21 12:38:00, 2017-10-18 11:55:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],112.227092,24.0,21.20,15.0,13.4,42.1,92.0,69.0,2.62260,1.53,4.65,251.0,128.0,1.0,0,"[2017-10-18 11:55:00, 2017-11-14 13:22:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],125.274747,100.0,,69.0,15.1,43.2,97.0,63.0,3.15594,1.58,5.34,160.0,130.0,,0,"[2017-11-14 13:22:00, 2017-12-12 10:52:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],107.582820,18.0,,50.0,13.8,44.1,92.0,58.0,2.60977,1.83,4.99,145.0,135.0,1.0,0,"[2017-12-12 10:52:00, 2018-01-05 11:00:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],125.274747,12.0,,27.0,15.3,42.6,89.0,55.0,1.71760,1.61,3.80,133.0,132.0,1.0,0,"[2018-01-05 11:00:00, 2018-02-01 13:48:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],108.354578,11.0,,26.0,14.3,41.0,97.0,57.0,2.88094,1.50,4.90,127.0,132.0,,0,"[2018-02-01 13:48:00, 2018-03-18 13:48:00]",0,,"[注射用奥沙利铂, 替吉奥胶囊]",0,[],112.521226,10.0,14.8,31.0,14.5,40.4,95.0,51.0,1.54860,1.44,3.48,109.0,128.0,1.0,0,[],0,,[],0,[],,,,,,,,,,,,,,1.0,0,[],0,,[],0,[],,,,,,,,,,,,,,,0,1,2017,26,27.0,27.0,24.0,27.0,45.0,,


## keep variables of interest

In [11]:
dynamic_feature = [
    '肾小球滤过率',
    '血常规血生化_生化检查:天门冬氨酸氨基转移酶(AST)-静脉血',
    '血常规血生化_生化检查:总胆红素(TBIL)-静脉血',
    '血常规血生化_生化检查:前白蛋白(PA)-静脉血',
    '血常规血生化_生化检查:碱性磷酸酶(ALP)-静脉血',
    '血常规血生化_生化检查:白蛋白(ALB)-静脉血',
    '血常规血生化_生化检查:丙氨酸氨基转移酶(ALT)-静脉血',
    '血常规血生化_生化检查:肌酐(Crea)-静脉血',

    '血常规血生化_血常规:血小板计数(PLT#)-静脉血',
    '血常规血生化_血常规:血红蛋白(Hb)-静脉血',
    '血常规血生化_血常规:白细胞计数(WBC#)-静脉血',
    '血常规血生化_血常规:淋巴细胞计数(Lymph#)-静脉血',
    '血常规血生化_血常规:中性粒细胞计数-静脉血',
    
    '预防性抗骨髓抑制药物',

    '身体状况评价_PS评分'
]

In [12]:
state_feature_list = [
    '性别',
    '年龄',
    '人口学_首诊年龄(岁)',
    '体重',
    '身高',
    'ABS',
    'BMI',

    '治疗史_既往放疗',
    'C0*既往化疗',

    '合并慢性疾病_充血性心力衰竭',
    '合并慢性疾病_心绞痛',
    '合并慢性疾病_心肌梗死',
    '合并慢性疾病_慢阻肺',
    '合并慢性疾病_消化性溃疡',
    '合并慢性疾病_甲状腺疾病',
    '合并慢性疾病_糖尿病',
    '合并慢性疾病_自身免疫性疾病',
    '合并慢性疾病_获得性免疫缺陷综合征',

    '肿瘤状态_pM分期',
    '肿瘤状态_pN分期',
    '肿瘤状态_pT分期',

    'C0*预后影响指标'
]

In [13]:
cycle_feature_list = list()
for i in range(1,9):
    cycle = "C"+str(i)
    cycle_feature_list += [cycle+"*"+name for name in dynamic_feature+["interval"]]


In [14]:
ch_columns_list = ["patient_sn", "event", "首次不良反应#周期", "人口学_住院号", "年份", "cycle_num"] \
    + state_feature_list + cycle_feature_list
output_data = data[ch_columns_list]
# output_data
# output_data.to_csv("./output/datalong_024.csv", index = False, encoding = "utf_8_sig")

## rename variables

In [15]:
e_dynamic_feature = [
    'GFR',
    'AST',
    'TBIL',
    'PA',
    'ALP',
    'ALB',
    'ALT',
    'Crea',

    'PLT',
    'Hb',
    'WBC',
    'Lymph',
    'Neut',
    
    'prevention',

    'PS_score'
]

In [16]:
e_state_feature_list = [
    'gender',
    'age',
    'age_at_diagnosis',
    'weight',
    'height',
    'ABS',
    'BMI',

    'previous_radiation',
    'previous_chemotherapy',

    'congestive_heart_failure',
    'angina_pectoris',
    'myocardial_infarction',
    'copd',
    'peptic_ulcer',
    'thyroid_disease',
    'diabetes',
    'autoimmune_disease',
    'acquired_immune_deficiency_syndrome',

    'tumor_stage_pM',
    'tumor_stage_pN',
    'tumor_stage_pT',

    'PNI'
]

In [17]:
e_cycle_feature_list = list()
for i in range(1,9):
    cycle = "C"+str(i)
    e_cycle_feature_list += [cycle+"*"+name for name in e_dynamic_feature+["interval"]]

In [18]:
en_columns_list = ["patient_sn", "event", "event#cycle", "visit_sn", "visit_year", "cycle_num"] +\
    e_state_feature_list + e_cycle_feature_list
output_data.columns = en_columns_list
# output_data.to_csv("./output/datalong_en_024.csv", index = False)

In [19]:
ch2en_columns_name_dict = dict(zip(ch_columns_list, en_columns_list))
f_save = open('./output/ch2en_columns_name_dict.pkl', 'wb')
pickle.dump(ch2en_columns_name_dict, f_save)
f_save.close()

en2ch_columns_name_dict = dict(zip(en_columns_list, ch_columns_list))
f_save = open('./output/en2ch_columns_name_dict.pkl', 'wb')
pickle.dump(en2ch_columns_name_dict, f_save)
f_save.close()