In [1]:
# !pip install cutkum
# !pip install tensorflow
# !pip install keras
# !pip install deepcut
# !pip install xgboost
# !pip install Pipeline

import pandas as pd
import numpy as np
import warnings
import deepcut

from sklearn.model_selection import GridSearchCV, train_test_split
from xgboost import XGBClassifier, plot_importance
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier

warnings.filterwarnings("ignore")
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

In [2]:
df = pd.read_excel ('E:\--- Job ---\--- งานนอก ---\อาจารย์พิมผกา (STATS)\Data\ex_df.xlsx', sheet_name="Sheet1")

df = df[df['HN'].notnull()]

df['HN'] = pd.to_numeric(df['HN'], downcast="integer")

df['Subject_ID'] = df['HN']

In [3]:
duplicate = df['Subject_ID'][df['Subject_ID'].duplicated()]

duplicate

41     649131
42     649131
67     647655
68     647655
69     647655
70     647655
74     129483
100    121410
108    14413 
109    14413 
124    65555 
129    342992
149    648773
157    301633
170    535807
172    38608 
173    38608 
186    66071 
192    15439 
219    642321
236    46244 
241    461995
246    66956 
248    491497
271    99104 
273    571589
314    266280
339    109588
380    649696
Name: Subject_ID, dtype: int32

# FEATURE EXTRACTION

In [4]:
personal_feature = ['Subject_ID', 'SEX_x', 'AGE']

icd10_feature = ['HN', 'A150', 'A162', 'A178', 'A881', 'B1810', 'B24', 'C110',
                'C182', 'C186', 'C19', 'C20', 'C221', 'C227', 'C250', 'C341',
                'C349', 'C501', 'C509', 'C61', 'C674', 'C73', 'C857', 'C884',
                'C900', 'C918', 'C944', 'D352', 'D473', 'D474', 'D500', 'D508',
                'D509', 'D560', 'D569', 'D591', 'D619', 'D638', 'D693', 'E032',
                'E036     ', 'E039', 'E042', 'E050', 'E052', 'E059', 'E061',
                 'E109', 'E112', 'E113', 'E116', 'E117', 'E118', 'E119', 'E132',
                'E2300', 'E2309', 'E232', 'E236', 'E559', 'E660', 'E780', 'E782',
                'E785', 'E789', 'E871', 'E876', 'F001', 'F064', 'F067', 'F102',
                'F105', 'F320', 'F321', 'F410', 'F419', 'F451', 'F510', 'G20',
                'G370', 'G430', 'G438', 'G439', 'G442', 'G470', 'G510', 'G603',
                'G618', 'G700', 'H018', 'H019', 'H041', 'H101', 'H259', 'H447',
                'H811', 'H919', 'I10', 'I158', 'I159', 'I252', 'I260', 'I269',
                'I272', 'I482', 'I493', 'I631', 'I633', 'I64', 'I694', 'I809',
                'I829', 'I951', 'J010', 'J159', 'J208', 'J300', 'J301', 'J302',
                'J303', 'J304', 'J440', 'J449', 'J459', 'J47', 'J991', 'K210',
                'K219', 'K297', 'K30', 'K590', 'K746', 'K760', 'L309', 'L405',
                'L501', 'L52', 'L853', 'L950', 'M028', 'M053', 'M0530', 'M058',
                'M0583', 'M061', 'M069', 'M0699', 'M0700', 'M0702', 'M100', 'M109',
                'M118', 'M130', 'M159', 'M214', 'M255', 'M2550', 'M321', 'M329',
                'M340', 'M350', 'M351', 'M352', 'M430', 'M452', 'M457', 'M458',
                'M459', 'M4712', 'M492', 'M791', 'M800', 'M810', 'M894', 'M8940',
                'M941', 'N003', 'N010', 'N029', 'N039', 'N049', 'N179', 'N182',
                'N183', 'N184', 'N185', 'N189', 'N201', 'N391', 'N832', 'N953',
                'O926', 'R030', 'R05', 'R060', 'R1049', 'R119', 'R14', 'R252',
                'R42', 'R509', 'R510', 'R609', 'R739', 'R828', 'Y453', 'Y608',
                'Z000', 'Z027', 'Z101', 'Z515', 'Z519', 'Z712', 'Z715', 'Z955',
                'Subject_ID']

dx_feature = ['HN', 'Chifef Complaint', 'Present Illness', 'Create By ',
              'Abdomen', 'Extremities', 'GA', 'HEENT', 'Heart', 'KUB', 'Lung',
              'Neuro', 'Other', 'Subject_ID']

lab_feature = ['Subject_ID', 'Glucose', 'Glycohemoglobin (HbA1c)', 'eGFR',
               'Creatinine', 'BUN', 'Albumin', 'Uric Acid', 'Total Protein',
               'Cholesterol', 'Triglyceride', 'HDL-c', 'LDL-c']

drug_feature = ['HN', 'ABIT00', 'ACTI01', 'AERN01', 'AERT00', 'AIMI00', 'AIRT01',
                'ALDT04', 'ALPT00', 'ALPT01', 'AMAT00', 'AMAT03', 'AMIT00',
                'AMIT02', 'AMKT01', 'AMLT00', 'AMLT01', 'ANAT04', 'ANCT01',
                'APOT01', 'AQUE01', 'AQUY01', 'ARAT01', 'ARCT00', 'ASPT02',
                'ASPT03', 'ASTT00', 'ATOT01', 'ATOT02', 'ATOT03', 'AVAH00',
                'AZAT01', 'AZIT00', 'BCOT01', 'BDPY00', 'BDPY00 ', 'BDQY00',
                'BDQY00 ', 'BELS00', 'BENT02', 'BENT04', 'BERLT3', 'BERN02',
                'BERT01', 'BEST00', 'BEST01', 'BETE05', 'BETT00', 'BIST03',
                'BLOT02', 'BRIT04', 'BROT04', 'CALT00', 'CALT02', 'CALT07',
                'CANE01', 'CAPY01', 'CART06', 'CART07', 'CART09', 'CDRT00',
                'CEFI01', 'CEFT01', 'CELH00', 'CELT03', 'CELT05', 'CELT06',
                'CELT07', 'CENT00', 'CENT01', 'CHAT03', 'CHLI03', 'CIPT04',
                'CIPT06', 'CLOE01', 'CODT00', 'CODT04', 'CODT05', 'COMT02',
                'COMT03', 'CONT02', 'CURT00', 'CYCT03', 'DAFT02', 'DAIE00',
                'DEAT01', 'DEPT04', 'DERE01', 'DETT01', 'DEXT06', 'DIAT01',
                'DIAT07', 'DILT03', 'DIMI00', 'DIPE01', 'DIPE04', 'DIPI00',
                'DIPT00', 'DOST02', 'DURE01', 'DYNI00', 'EBIT02', 'EDAT00',
                'ELOE02', 'EMTT00', 'ENAI02', 'ENPT00', 'EPOI00', 'EPOI01',
                'ERLT00', 'ETHT01', 'EUTT00', 'EUTT01', 'EXTY01 ', 'EXTY08',
                'EXTY08 ', 'EZET00', 'FACY00', 'FBCT01', 'FEBT01', 'FEMT01',
                'FENT02', 'FERT01', 'FIBT00', 'FIRT00', 'FLUL04', 'FLUL06',
                'FLUT02', 'FLUT05', 'FOLT01', 'FORT00', 'FOST01', 'FUCE04',
                'FURT02', 'FURT03', 'GABT00', 'GABT01', 'GAST02', 'GAVT00',
                'GERT01', 'GLIT04', 'GLUT07', 'GOWY04', 'HALT11', 'HASY02',
                'HCQT00', 'HEMI07', 'HIBI01', 'HULI00', 'HUMI07', 'HYDT02',
                'HYDT04', 'HYPT00', 'IBUT03', 'IMOT02', 'INSY02', 'ISOT02',
                'ISOT03', 'IVCY03', 'IVCY04', 'IVCY04 ', 'IVCY05', 'IVCY05 ',
                'IVCY06', 'IVCY06 ', 'IVCY08', 'IVCY08 ', 'IVET01', 'IVSY03',
                'IVSY03 ', 'JANT01', 'JANT02', 'JART00', 'JART01', 'JART03',
                'KALL01', 'KAMN02', 'KANI00', 'KRET00', 'LAMT07', 'LASI02',
                'LERT00', 'LEXT00', 'LIPT02', 'LIVT00', 'LORT00', 'LORT03',
                'LORT04', 'LORT05', 'LYRT06', 'LYRT07', 'MADT01', 'MADT02',
                'MADT03', 'MAFT00', 'MAFT02', 'MAFT03', 'MAGT03', 'MASY00',
                'MASY04', 'MAXT02', 'MECT00', 'MEDT00', 'MEPT02', 'MERT00',
                'MEST02', 'METE00', 'METT03', 'METT07', 'METT16', 'MEVT03',
                'MILS00', 'MINE00', 'MINT06', 'MIRT00', 'MIRT01', 'MOBT02',
                'MONT01', 'MTS02', 'MULT01', 'MYDT01', 'MYFT00', 'NAPT00',
                'NASH01', 'NASH02', 'NASY16', 'NATH00', 'NAUT00', 'NAVT00',
                'NEEY01', 'NEEY02', 'NEEY07', 'NEEY26', 'NEOE01', 'NEUT02',
                'NGTY02', 'NIFT03', 'NORT04', 'NOVI00', 'NSSI03', 'NSSI04 ',
                'NSSI07', 'NSSI09', 'NUET00', 'OBIT00', 'ONSI01', 'ORKT01',
                'OSET03', 'OSRL01', 'OXYY01', 'OZEI01', 'OZEI02', 'PANT00',
                'PATH02', 'PIVY00', 'PIVY00 ', 'PLAE01', 'POTS01', 'POTS05',
                'PRET00', 'PRET04', 'PRET13', 'PRET18', 'PROI06', 'PURT02',
                'QBAY00', 'QBAY00 ', 'QUIT02', 'RECI03', 'RELN01', 'RELN02',
                'RELT01', 'REMT04', 'RENT03', 'REQT00', 'REVT00', 'RIFT03',
                'RIFT07', 'RIFT08', 'RITT00', 'RUPT00', 'SALT01', 'SALT06',
                'SENT00', 'SERN05', 'SERN06', 'SERN07', 'SERT05', 'SERT12',
                'SERT14', 'SERT15', 'SERT16', 'SIAT01', 'SIAT04', 'SILT02',
                'SINT04', 'SKIE04', 'SODT00', 'SODT02', 'SODT03', 'SPIN00',
                'SPIN03', 'STRT01', 'SUPT01', 'SYMN00', 'SYMN01', 'SYMN02',
                'SYMN03', 'SYRY02', 'SYRY03', 'SYRY04', 'SYRY06', 'SYRY09',
                'TALI00', 'TART05', 'TART07', 'TEAH02', 'TEBT01', 'TEGY00',
                'TEGY00 ', 'TELT03', 'TENT04', 'TERH01', 'TEVI01', 'THRY02',
                'THRY02 ', 'THYT02', 'TIMT00', 'TOFT01', 'TOLT00', 'TONT00',
                'TOPT01', 'TOPT02', 'TRAT08', 'TRET01', 'TRIE11', 'TRIE13',
                'TRUI01', 'TUMT00', 'TYLT02', 'ULTT00', 'UREE03', 'UTMT00',
                'VAC45', 'VAC83', 'VAC93', 'VALT00', 'VAST00', 'VENI01', 'VITB1T',
                'VITT00', 'VITT01', 'VITT02', 'VITT08', 'VITT16', 'VITT17',
                'VIVT01', 'VIVT02', 'WATI02', 'XANT00', 'XANT05', 'XART01',
                'XART02', 'XYZT00', 'ZEFI01', 'ZERT00', 'ZIMT01', 'ZINT00',
                'ZITT01', 'ZOLI02', 'ZOLI03', 'ZOLT00', 'ZONT02', 'Subject_ID'] 

drug_detail_feature = ['HN', 'Abdominal pain, unspecified (TM)',
                       'Abnormal findings on cytological and histological examination of urine',
                       'Acute atopic conjunctivitis',
                       'Acute bronchitis due to other specified organisms',
                       'Acute maxillary sinusitis',
                       'Acute nephritic syndrome: diffuse mesangial proliferative glomerulonephritis',
                       'Acute panmyelosis', 'Acute renal failure, unspecified',
                       "Adult-onset Still's disease", 'Alcohol, Dependence sydrome',
                       'Alcohol, Psychotic disorder', 'Allergic rhinitis due to pollen',
                       'Allergic rhinitis, unspecified', 'Alpha thalassaemia',
                       'Anaemia in other chronic diseases classified elsewhere',
                       'Ankylosing spondylitis, cervical region',
                       'Ankylosing spondylitis, lumbosacral region',
                       'Ankylosing spondylitis, sacral and sacrococcygeal region',
                       'Ankylosing spondylitis, site unspecified',
                       'Anxiety disorder, unspecified', 'Aplastic anaemia, unspecified',
                       'Arthropathic psoriasis (M07.0 - M07.3*, M09.0*)',
                       'Asthma, unspecified', 'Bacterial pneumonia, unspecified',
                       "Behcet's disease", "Bell's palsy",
                       'Benign neoplasm of pituitary gland', 'Benign paroxysmal vertigo',
                       'Bronchiectasis', 'Calculus of ureter',
                       'Cerebral infarction due to embolism of precerebral arteries',
                       'Cerebral infarction due to thrombosis of cerebral arteries',
                       'Chronic atrial fibrillation', 'Chronic kidney disease, stage 2',
                       'Chronic kidney disease, stage 3',
                       'Chronic kidney disease, stage 4',
                       'Chronic kidney disease, stage 5',
                       'Chronic nephritic syndrome: unspecified',
                       'Chronic obstructive pulmonary disease with acute lower respiratory infection',
                       'Chronic obstructive pulmonary disease, unspecified',
                       'Chronic renal failure, unspecified',
                       'Chronic viral hepatitis B without delta-agent (TM)',
                       'Constipation', 'Cough', 'Cramp and spasm',
                       "Dementia in Alzheimer's disease with late onset (G30.1+)",
                       'Dermatitis, unspecified', 'Diabetes insipidus',
                       'Diffuse sclerosis',
                       'Disorder of lipoprotein metabolism, unspecified',
                       'Disorders of initiating and maintaining sleep [insomnias]',
                       'Distal Interphalangal psoriatic arthropathy ( L40.5 ) Upper arm',
                       'Distal interphalangeal psoriatic arthropathy (L40.5+), multiple sites',
                       'Dizziness and giddiness',
                       'Drug abuse counselling and surveillance',
                       'During other surgical and medical care', 'Dyspepsia', 'Dyspnoea',
                       'Elevated blood pressure reading without diagnosis of hypertension',
                       'Embolism and thrombosis of unspecified vein',
                       'Enterobacterial spondylitis (A01-A04+)', 'Epidemic vertigo',
                       'Erythema nodosum', 'Essential (haemorrhagic) thrombocythaemia',
                       'Essential (primary) hypertension',
                       'Extranodal marginal zone B-cell lymphoma of mucosa-associated lymphoid tissue [MALT-lyphoma]',
                       'Fatty (change of) liver, not elsewhere classified',
                       'Fever, unspecified', 'Flat foot [pes planus] (acquired)',
                       'Flatulence and related conditions', 'Galactorrhoea',
                       'Gastritis, unspecified',
                       'Gastro-oesophageal reflux disease with oesophagitis',
                       'Gastro-oesophageal reflux disease without oesophagitis',
                       'General medical examination', 'Gout, unspecified',
                       'Headache, unspecified (TM)', 'Hearing loss, unspecified',
                       'Hyperglycaemia, unspecified', 'Hyperlipidaemia, unspecified',
                       'Hypogonadotropic hypogonadism', 'Hypokalaemia',
                       'Hypopituitarism, unspecified', 'Hyposmolality and hyponatraemia',
                       'Hypothyroidism due to medicaments and other exogenous substances',
                       'Hypothyroidism from Hashimoto’s thyroiditis (TM)',
                       'Hypothyroidism, unspecified', 'Idiopathic gout',
                       'Idiopathic progressive neuropathy',
                       'Idiopathic thrombocytopenic purpura', 'Idiopathic urticaria',
                       'Inflammation of eyelid, unspecified',
                       'Insulin-dependent diabetes mellitus, without complications',
                       'Intrahepatic bile duct carcinoma',
                       'Iron deficiency anaemia secondary to blood loss (chronic)',
                       'Iron deficiency anaemia, unspecified',
                       'Issue of medical certificate', 'Livedoid vasculitis',
                       'Malignant neoplasm of breast, unspecified',
                       'Malignant neoplasm of bronchus or lung, unspecified',
                       'Malignant neoplasm of central portion of breast',
                       'Malignant neoplasm of colon, ascending colon',
                       'Malignant neoplasm of colon, descending colon',
                       'Malignant neoplasm of nasopharynx, superior wall of nasopharynx',
                       'Malignant neoplasm of pancreas, head of pancreas',
                       'Malignant neoplasm of posterior wall of bladder',
                       'Malignant neoplasm of prostate',
                       'Malignant neoplasm of rectosigmoid junction',
                       'Malignant neoplasm of rectum',
                       'Malignant neoplasm of thyroid gland',
                       'Malignant neoplasm of upper lobe, bronchus or lung',
                       'Mature B-cell leukaemia Burkitt-type',
                       'Medical care, unspecified',
                       'Migraine without aura [common migraine]', 'Migraine, unspecified',
                       'Mild cognitive disorder', 'Mild depressive episode',
                       'Mixed hyperlipidaemia', 'Moderate depressive episode',
                       'Multiple myeloma', 'Myalgia', 'Myasthenia gravis',
                       'Nausea and vomiting, unspecified (TM)',
                       'Nephrotic syndrome: unspecified',
                       'Non-insulin-dependent diabetes mellitus, with multiple complications',
                       'Non-insulin-dependent diabetes mellitus, with ophthalmic complications',
                       'Non-insulin-dependent diabetes mellitus, with other specified complications',
                       'Non-insulin-dependent diabetes mellitus, with renal complications',
                       'Non-insulin-dependent diabetes mellitus, with unspecified complications',
                       'Non-insulin-dependent diabetes mellitus, without complications',
                       'Nonorganic insomnia', 'Nontoxic multinodular goitre',
                       'Obesity due to excess calories', 'Oedema, unspecified',
                       'Old myocardial infarction', 'Organic anxiety disorder',
                       'Orthostatic hypotension', 'Osteomyelofibrosis',
                       'Other allergic rhinitis',
                       'Other and unspecified cirrhosis of liver',
                       'Other and unspecified ovarian cysts',
                       'Other autoimmune haemolytic anaemias',
                       'Other disorders of lacrimal gland',
                       'Other disorders of pituitary gland',
                       'Other hypertrophic osteoarthropathy',
                       'Other hypertrophic osteoarthropathy: multiple sites',
                       'Other inflammatory polyneuropathies',
                       'Other iron deficiency anaemias', 'Other migraine',
                       'Other nonsteroidal anti-inflammatory drugs [NSAID]',
                       'Other overlap syndromes', 'Other reactive arthropathies',
                       'Other seasonal allergic rhinitis', 'Other secondary hypertension',
                       'Other secondary pulmonary hypertension',
                       'Other seropositive rheumatoid arthritis',
                       'Other seropositive rheumatoid arthritis, wrist',
                       'Other specified carcinomas of liver',
                       'Other specified crystal arthropathies',
                       'Other specified diabetes mellitus, with renal complications',
                       'Other specified inflammation of eyelid',
                       "Other specified types of non-Hodgkin's lymphoma",
                       'Other spondylosis with myelopathy: cervical region',
                       'Other tuberculosis of nervous system', 'Pain in joint',
                       'Pain in joint: multiple sites', 'Palliative care',
                       'Panic disorder [episodic paroxysmal anxiety]',
                       'Parkinson s disease', 'Persistent proteinuria, unspecified',
                       'Person consulting for explanation of investigation findings',
                       'Phlebitis and thrombophlebitis of unspecified site',
                       'Polyarthritis, unspecified', 'Polyarthrosis, unspecified',
                       'Postmenopausal osteoporosis',
                       'Postmenopausal osteoporosis with pathological fracture',
                       'Presence of coronary angioplasty implant and graft',
                       'Progressive systemic sclerosis',
                       'Pulmonary embolism with mention of acute cor pulmonale NOS',
                       'Pulmonary embolism without mention of acute cor pulmonale',
                       'Pure hypercholesterolaemia',
                       'Rapidly progressive nephritic syndrome: minor glomerular abnormality',
                       'Recurrent and persistent haematuria: unspecified',
                       'Relapsing polychondritis',
                       'Respiratory disorders in other diffuse connective tissue disorders',
                       'Retained (old) intraocular foreign body, nonmagnetic',
                       'Rheumatoid arthritis with involvement of other organs and systems',
                       'Rheumatoid arthritis with involvement of other organs and systems, multiple sites',
                       'Rheumatoid arthritis, unspecified',
                       'Rheumatoid arthritis, unspecified: site unspecified',
                       'Routine general health check-up of inhabitants of institutions',
                       'Secondary hypertension, unspecified',
                       'Senile cataract, unspecified',
                       'Sequelae of stroke, not specified as haemorrhage or infarction',
                       'Sicca syndrome [Sjogren]', 'Spondylolysis',
                       'States associated with artificial menopause',
                       'Stroke, not specified as haemorrhage or infarction',
                       'Subacute thyroiditis',
                       'Systemic lupus erythematosus with involvement of organs and systems',
                       'Systemic lupus erythematosus, unspecified',
                       'Tension-type headache', 'Thalassaemia, unspecified',
                       'Thyrotoxicosis with diffuse goitre',
                       'Thyrotoxicosis with toxic multinodular goitre',
                       'Thyrotoxicosis, unspecified',
                       'Tuberculosis of lung, confirmed by sputum microscopy with or without culture',
                       'Tuberculosis of lung, without mention of bacteriological or histological confirmation',
                       'Undifferentiated somatoform disorder',
                       'Unspecified human immunodeficiency virus [HIV] disease',
                       'Vasomotor rhinitis', 'Ventricular premature depolarization',
                       'Vitamin D deficiency, unspecified', 'Xerosis cutis', ' ',
                       'Acetylcysteine', 'Adalimumab', 'Agomelatine',
                       'Alendronate, Colecalciferol', 'Alfacalcidol', 'Allopurinol',
                       'Alogliptin, Pioglitazone', 'Alprazolam',
                       'Aluminium, Magnesium, Dimethylpolysiloxane', 'Amitriptyline',
                       'Amlodipine', 'Amlodipine, Valsartan', 'Amoxicillin, Clavulanate',
                       'Antacids, Aluminium hydroxide, Magnesium hydroxide',
                       'Aripiprazole', 'Aspirin', 'Atenolol', 'Atorvastatin',
                       'Azathioprine', 'Azilsartan', 'Azithromycin', 'Bemiparin',
                       'Betahistine', 'Betamethasone', 'Betamethasone,Salicylic acid',
                       'Bisoprolol', 'Bortezomib', 'Bosentan', 'Bromocriptine',
                       'Budesonide, Formoterol', 'Calcipotriol, Betamethasone',
                       'Calcium carbonate', 'Calcium polystyrene sulfonate',
                       'Calcium, Vitamin C, Vitamin D3, Vitamin B6',
                       'Calcium, Vitamin D3', 'Candesartan', 'Carboxymethylcellulose',
                       'Carvedilol', 'Cefixime', 'Ceftriaxone', 'Celecoxib', 'Cetirizine',
                       'Chamomile extract, Essential oil, Chamazulene',
                       'Chlordiazepoxide, Clidinium', 'Chlorpheniramine', 'Ciprofloxacin',
                       'Clobetasol propionate', 'Clonazepam', 'Clopidogrel',
                       'Clotrimazole', 'Codeine, Glyceryl guaiacolate', 'Colchicine',
                       'Curcuminoid', 'Cyclophosphamide',
                       'Cyclosporine, Ciclosporin A microemulsion', 'Dapagliflozin',
                       'Denosumab', 'Desloratadine', 'Desmopressin acetate',
                       'Dexamethasone', 'Dextran, HPMC', 'Diazepam', 'Diclofenac',
                       'Dimenhydrinate', 'Domperidone', 'Doxazosin', 'Doxofylline',
                       'Dulaglutide', 'Electrolyte', 'Eletriptan', 'Empagliflozin',
                       'Empagliflozin, Metformin', 'Enalapril', 'Entacapone',
                       'Epoetin alfa', 'Epoetin beta', 'Erenumab', 'Ergotamine, Caffeine',
                       'Erlotinib', 'Escitalopram', 'Ethambutol', 'Etoricoxib',
                       'Ezetimibe', 'Ezetimibe, Atorvastatin', 'Febuxostat',
                       'Fenofibrate', 'Fentanyl', 'Ferric hydroxide sucrose complex',
                       'Ferrous fumarate, Folic, Vitamin B6',
                       'Ferrous fumarate, Multivitamin, Mineral', 'Fexofenadine',
                       'Finasteride', 'Fluconazole', 'Flunarizine',
                       'Flupentixol, Melitracen', 'Fluticasone furoate',
                       'Fluticasone furoate, Vilanterol', 'Folic acid', 'Furosemide',
                       'Fusidic acid, Betamethasone valerate', 'Gabapentin',
                       'Ginkgo biloba extract', 'Gliclazide', 'Glimepiride', 'Glipizide',
                       'Goserelin acetate', 'HPMC', 'Haloperidol', 'Hydrochlorothiazide',
                       'Hydroxychloroquine', 'Hydroxyzine', 'Hyoscine-N-butylbromide',
                       'Ibuprofen', 'Influenza vaccine',
                       'Insulin, Insulin aspart, Insulin aspart protamine',
                       'Insulin, Insulin lispro, Insulin lispro protamine',
                       'Insulin, Regular insulin', 'Ipratropium, Fenoterol', 'Isoniazid',
                       'Ivermectin', 'Ixekizumab', 'Lamivudine', 'Lansoprazole',
                       'Leflunomide', 'Lercanidipine', 'Letrozole', 'Leuprorelin acetate',
                       'Levocetirizine', 'Levodopa, Benserazide', 'Levothyroxine',
                       'Lipase, Protease, Amylase', 'Loperamide', 'Loratadine',
                       'Lorazepam', 'Losartan', 'Magnesium hydroxide', 'Magnesium oxide',
                       'Manidipine', 'Meloxicam', 'Memantine', 'Metformin', 'Methimazole',
                       'Methotrexate', 'Methyl Salicylate',
                       'Methyl salicylate, Menthol, Camphor, Eucalyptu, Eu', 'Methyldopa',
                       'Methylphenidate', 'Metoclopramide', 'Metoprolol', 'Minoxidil',
                       'Mirtazapine', 'Mometasone furoate', 'Montelukast', 'Mosapride',
                       'Multivitamin', 'Multivitamin, Mineral', 'Mycophenolate',
                       'Mycophenolate mofetil, Mycophenolate', 'Naproxen', 'Nicergoline',
                       'Nifedipine', 'Olopatadine', 'Omeprazole', 'Ondansetron',
                       'Oxytetracycline, Polymyxin B', 'Paracetamol',
                       'Paracetamol, Codeine', 'Paracetamol, Orphenadrine',
                       'Paracetamol, Tramadol', 'Parecoxib', 'Pentoxifylline',
                       'Phentermine', 'Phenytoin', 'Pioglitazone', 'Pitavastatin',
                       'Pli (ไพล)', 'Pneumococcal vaccine', 'Potassium chloride',
                       'Pravastatin', 'Pre-filled Saline Syringes', 'Prednicarbate',
                       'Prednisolone', 'Pregabalin', 'Probenecid', 'Procaterol',
                       'Propranolol', 'Pseudoephedrine', 'Pyridostigmine', 'Quetiapine',
                       'Rifampin, Rifampicin', 'Rivaroxaban', 'Ropinirole',
                       'Rosuvastatin', 'Rupatadine', 'Salbutamol',
                       'Salmeterol, Fluticasone', 'Semaglutide', 'Sertraline',
                       'Sevelamer', 'Sildenafil', 'Simethicone', 'Simvastatin',
                       'Sitagliptin', 'Skin cream', 'Sodium alginate',
                       'Sodium bicarbonate', 'Sodium chloride', 'Spironolactone',
                       'Standardised senna extract', 'Sterile Water', 'Sulfasalazine',
                       'Sumatriptan', 'Teneligliptin', 'Theophylline', 'Tiotropium',
                       'Tiotropium,Olodateral', 'Tolperisone', 'Tolterodine',
                       'Topiramate', 'Tramadol', 'Trazodone', 'Triamcinolone acetonide',
                       'Trihexyphenidyl', 'Trimetazidine',
                       'Trimethoprim, Sulfamethoxazole, Co-trimoxazole', 'Tussis', 'Urea',
                       'Valproic acid', 'Valsartan', 'Valsartan, Hydrochlorothiazide',
                       'Verapamil', 'Vitamin B Complex', 'Vitamin B1',
                       'Vitamin B1, Vitamin B6, Vitamin B12', 'Vitamin B12', 'Vitamin B6',
                       'Vitamin C', 'Vitamin D2', 'Vortioxetine', 'Warfarin',
                       'Water for injection', 'Zinc sulfate', 'Zoledronic acid',
                       'Zonisamide', 'กาวน์พลาสติกกันน้ำชนิดเต็มตัว', 'จุกล้างจมูก',
                       'Subject_ID']


clinic_feature = ['Subject_ID', 'Temp', 'Pulse', 'RR', 'O2Sat', 'Body Weight',
                  'Height', 'BMI', 'sbp1', 'dbp1', 'sbp2', 'dbp2']

label_feature = ['Subject_ID', 'DM_label', 'HTN_label', 'CKD_label', 'DLP_label']

# DATA EXTRACTION

In [5]:
# Personal Columns
df_personal = df[personal_feature]

# ICD-10 Columns
df_icd10 = df[icd10_feature]

# DX Columns
df_dx = df[dx_feature]

# Lab Columns
df_lab = df[lab_feature]

# Drug
df_drug = df[drug_feature]

# Drug Details
df_drug_detail = df[drug_detail_feature]

# Clinic
df_clinic = df[clinic_feature]

# Label
df_label = df[label_feature]

# DATA CLEANSING

In [6]:
df_report = pd.DataFrame(columns=['group', 'row', 'duplicated', 'subject id', 'new row'])

### Personal  

In [7]:
# Personal Columns

# Check dupliated data
df_personal_dup = df_personal[df_personal.duplicated()]
id_dup = df_personal_dup['Subject_ID'].drop_duplicates().values


# Drop duplicated data
df_personal_nodup = df_personal.drop_duplicates()


# Make a report
personal_report = ['Personal', df_personal.shape[0], df_personal_dup.shape[0], id_dup, df_personal_nodup.shape[0]]
df_personal_report = pd.Series(personal_report, index = df_report.columns)

### ICD-10

In [8]:
# ICD-10 Columns

# Fill NaN with 0
df_icd10.fillna(0, inplace=True)

# Check dupliated data
df_icd10_dup = df_icd10[df_icd10.duplicated()]
id_dup = df_icd10_dup['Subject_ID'].drop_duplicates().values

# Drop duplicated data
df_icd10_nodup = df_icd10.drop_duplicates()

# Get all columns name
variable = list(df_icd10.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_icd10_nodup = df_icd10.groupby(['Subject_ID'])[variable].agg('max').reset_index()
df_icd10_nodup = df_icd10_nodup.astype(int)

# Make a report
icd10_report = ['ICD-10', df_icd10.shape[0], df_icd10_dup.shape[0], id_dup, df_icd10_nodup.shape[0]]
df_icd10_report = pd.Series(icd10_report, index = df_report.columns)

### Drug  

In [9]:
# Fill NaN with 0
df_drug.fillna(0, inplace=True)

# Check dupliated data
df_drug_dup = df_drug[df_drug.duplicated()]
id_dup = df_drug_dup['Subject_ID'].drop_duplicates().values

# Drop duplicated data
df_drug_nodup = df_drug.drop_duplicates()

# Get all columns name
variable = list(df_drug.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_drug_nodup = df_drug.groupby(['Subject_ID'])[variable].agg('max').reset_index()
df_drug_nodup = df_drug_nodup.astype(int)

# Make a report
drug_report = ['Drug', df_drug.shape[0], df_drug_dup.shape[0], id_dup, df_drug_nodup.shape[0]]
df_drug_report = pd.Series(drug_report, index = df_report.columns)

### Drug  Details

In [10]:
# Drug Details

# Fill NaN with 0
df_drug_detail.fillna(0, inplace=True)

# Check dupliated data
df_drug_detail_dup = df_drug_detail[df_drug_detail.duplicated()]
id_dup = df_drug_detail_dup['Subject_ID'].drop_duplicates().values

# Drop duplicated data
df_drug_detail_nodup = df_drug_detail.drop_duplicates()

# Get all columns name
variable = list(df_drug_detail.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_drug_detail_nodup = df_drug_detail.groupby(['Subject_ID'])[variable].agg('max').reset_index()
df_drug_detail_nodup = df_drug_detail_nodup.astype(int)

# Make a report
drug_detail_report = ['Drug Details', df_drug_detail.shape[0], df_drug_detail_dup.shape[0], id_dup, df_drug_detail_nodup.shape[0]]
df_drug_detail_report = pd.Series(drug_detail_report, index = df_report.columns)

### Clinic

In [11]:
# Clinic

# Fill 0 with NaN
df_clinic.replace(0, np.nan, inplace=True)

# Check dupliated data
df_clinic_dup = df_clinic[df_clinic.duplicated()]
id_dup = df_clinic_dup['Subject_ID'].drop_duplicates().values

# Get all columns name
variable = list(df_clinic.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_clinic_groupby = df_clinic.groupby(['Subject_ID'])[variable].agg('max').reset_index()

# Drop duplicated data
df_clinic_nodup = df_clinic_groupby.drop_duplicates()
df_clinic_nodup.replace(0, np.nan, inplace=True)

# Get last BP
df_clinic_nodup['sbp'] = np.where(df_clinic_nodup['sbp2'].isnull(), df_clinic_nodup['sbp1'], df_clinic_nodup['sbp2'])
df_clinic_nodup['dbp'] = np.where(df_clinic_nodup['dbp2'].isnull(), df_clinic_nodup['dbp1'], df_clinic_nodup['dbp2'])

del df_clinic_nodup['sbp1']
del df_clinic_nodup['dbp1']
del df_clinic_nodup['sbp2']
del df_clinic_nodup['dbp2']

# Make a report
clinic_report = ['Clinic', df_clinic.shape[0], df_clinic_dup.shape[0], id_dup, df_clinic_nodup.shape[0]]
df_clinic_report = pd.Series(clinic_report, index = df_report.columns)

### Lab

In [12]:
def remove_repeated(x):
    if isinstance(x, str): 
        return int(x.rstrip('  (Repeated)'))
    else:
        return x

df_lab.replace("Negative", 0, inplace=True)
df_lab.replace("2+", 2, inplace=True)
df_lab.replace("Adequate", 0, inplace=True)
df_lab.replace("Adequate (with Giant platelets)", 0, inplace=True)
df_lab.replace("Increase", 1, inplace=True)
df_lab.replace("Trace", 0, inplace=True)
df_lab.replace("Yellow", 0, inplace=True)
df_lab.replace("Pale yellow", 1, inplace=True)
df_lab.replace("Clear", 0, inplace=True)
df_lab.replace("Rare", 0, inplace=True)
df_lab.replace("Few", 1, inplace=True)
df_lab.replace("Moderate", 2, inplace=True)
df_lab.replace("0-1", 0, inplace=True)

df_lab = df_lab[lab_feature]

In [13]:
# Check dupliated data
df_lab_dup = df_lab[df_lab.duplicated()]
id_dup = df_lab_dup['Subject_ID'].drop_duplicates().values

# Drop duplicated data
df_lab_nodup = df_lab.drop_duplicates()

# Get all columns name
variable = list(df_lab.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_lab_nodup = df_lab.groupby(['Subject_ID'])[variable].agg('max').reset_index()

# Make a report
lab_report = ['Lab', df_lab.shape[0], df_lab_dup.shape[0], id_dup, df_lab_nodup.shape[0]]
df_lab_report = pd.Series(lab_report, index = df_report.columns)

### Label

In [14]:
# Check dupliated data
df_label_dup = df_label[df_label.duplicated()]
id_dup = df_label_dup['Subject_ID'].drop_duplicates().values

# Get all columns name
variable = list(df_label.columns)
variable.remove('Subject_ID')

# Get MAX values all column with duplicated Subject_ID
df_label_groupby = df_label.groupby(['Subject_ID'])[variable].agg('max').reset_index()

# Drop duplicated data
df_label_nodup = df_label_groupby.drop_duplicates()

# Make a report
label_report = ['Label', df_label.shape[0], df_label_dup.shape[0], id_dup, df_label_nodup.shape[0]]
df_label_report = pd.Series(label_report, index = df_report.columns)

## Report  

In [15]:
df_report = df_report.append([df_personal_report, 
                              df_icd10_report, 
                              df_drug_report, 
                              df_drug_detail_report, 
                              df_clinic_report,
                              df_lab_report,
                              df_label_report
                             ])

df_report

Unnamed: 0,group,row,duplicated,subject id,new row
0,Personal,427,29,"[649131, 647655, 129483, 121410, 14413, 65555, 342992, 648773, 301633, 535807, 38608, 66071, 15439, 642321, 46244, 461995, 66956, 491497, 99104, 571589, 266280, 109588, 649696]",398
1,ICD-10,427,16,"[649131, 647655, 14413, 65555, 342992, 648773, 301633, 15439, 642321, 46244, 461995, 66956, 491497]",398
2,Drug,427,9,"[647655, 14413, 342992, 535807, 66071, 642321, 461995]",398
3,Drug Details,427,6,"[647655, 14413, 342992, 642321, 461995]",398
4,Clinic,427,14,"[649131, 647655, 14413, 342992, 38608, 642321, 46244, 461995, 66956, 491497, 99104]",398
5,Lab,427,12,"[649131, 647655, 129483, 14413, 342992, 38608, 642321, 461995, 99104]",398
6,Label,427,19,"[647655, 129483, 14413, 65555, 342992, 648773, 301633, 66071, 15439, 642321, 46244, 461995, 66956, 491497, 571589]",398


# Text Analysis

In [16]:
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

## Doctor Chifef Complaint

In [17]:
df_doc_cc = df[['Subject_ID', 'Chifef Complaint', 'Present Illness', 'Medical Note']]

df_doc_cc["Doc CC"] = df_doc_cc["Chifef Complaint"].map(str) + " " + df_doc_cc["Present Illness"] + " " + df_doc_cc["Medical Note"].map(str)

df_doc_cc = df_doc_cc[['Subject_ID','Doc CC']]

df_doc_cc['Doc CC'] = df_doc_cc['Doc CC'].astype(str)

df_doc_cc_nodup = df_doc_cc.groupby(['Subject_ID']).agg({'Doc CC': ' '.join}).reset_index()

df_doc_cc_nodup['Doc CC'] = df_doc_cc_nodup['Doc CC'].str.lower()

### Pre-process Text

In [18]:
import string
special_character = ['[',']','"', '#', 'nan', ':', ';']
for i in range (len(df_doc_cc_nodup)):
    for character in special_character:
        df_doc_cc_nodup['Doc CC'][i] = df_doc_cc_nodup['Doc CC'][i].replace(character, '')

In [19]:
# Word Token

word_list = ['diabetes', 'hypertension', 'obesity', 'statin', 
             'thyroid', 'thyrotoxicosis',
             'hypothyroid', 'hypoglycemia', 'hypogonadism', 'kidney stone', 'bisoprolol', 'thalassemia',
             'hypothyroidism', 'DYSLIPID', 'Dyslipidemia']

word_list_lower = list(map(lambda x: x.lower(), word_list))

for word_key in word_list_lower:
    df_doc_cc_nodup[word_key + "_key"] = ""
    
for word_key in word_list_lower:

    df_doc_cc_nodup[word_key + '_key'] = df_doc_cc_nodup['Doc CC'].astype(str).str.contains(word_key)
    df_doc_cc_nodup[word_key + '_key'] = np.where(df_doc_cc_nodup[word_key + '_key'] == True, 1, 0)


    
    
    
# Keyword Token

keyword_list = ['DM', 'CKD', 'CPK', 'WNL', 'HT', 'HTN', 'DLP', 'CVD', 'TB', 'PID',
                'UTI', 'ARF', 'CRF', 'ESRD', 'ATN', 'ALS', 'CVA', 'ARF', 'CRF', 'T1D', 'T2D', 'T1DM', 'T2DM']

keyword_list_lower = list(map(lambda x: x.lower(), keyword_list))

df_doc_cc_nodup['Doc CC List'] = ""

for keyword_key in keyword_list_lower:
    df_doc_cc_nodup[keyword_key + "_key"] = ""

for i in range (len(df_doc_cc_nodup)):
    
    df_doc_cc_nodup['Doc CC List'][i] = deepcut.tokenize(df_doc_cc_nodup['Doc CC'][i])
    
    # Remove " " in list
    df_doc_cc_nodup['Doc CC List'][i] = [ele for ele in df_doc_cc_nodup['Doc CC List'][i] if ele.strip()]
    
    # Remove nan in list
    df_doc_cc_nodup['Doc CC List'][i] = [v1 for v1 in df_doc_cc_nodup['Doc CC List'][i] if v1 != 'nan']
    
    for keyword_key in keyword_list_lower:
        
        df_doc_cc_nodup[keyword_key + "_key"][i] = np.where(len([ele for ele in df_doc_cc_nodup['Doc CC List'][i] if ele == keyword_key]) == 0, 0, 1)

        
del df_doc_cc_nodup['Doc CC']
del df_doc_cc_nodup['Doc CC List']

df_doc_cc_nodup = df_doc_cc_nodup.astype(int)        

## Nurse Chifef Complaint

In [20]:
# df_nurse_cc = df[['Subject_ID', 'Nurse Chief Complaint', 'Nurse Present Illness']]

# df_nurse_cc["Nurse CC"] = df_nurse_cc["Nurse Chief Complaint"].map(str) + " " + df_nurse_cc["Nurse Present Illness"].map(str)

# df_nurse_cc = df_nurse_cc[['Subject_ID','Nurse CC']]

# df_nurse_cc['Nurse CC'] = df_nurse_cc['Nurse CC'].astype(str)

# df_nurse_cc_nodup = df_nurse_cc.groupby(['Subject_ID']).agg({'Nurse CC': ' '.join}).reset_index()

In [21]:
# df_nurse_cc_nodup['Nurse CC List'] = ""

# for i in range (len(df_nurse_cc_nodup)):
    
#     df_nurse_cc_nodup['Nurse CC List'][i] = deepcut.tokenize(df_nurse_cc_nodup['Nurse CC'][i])
    
#     # Remove " " in list
#     df_nurse_cc_nodup['Nurse CC List'][i] = [ele for ele in df_nurse_cc_nodup['Nurse CC List'][i] if ele.strip()]
    
#     # Remove nan in list
#     df_nurse_cc_nodup['Nurse CC List'][i] = [x for x in df_nurse_cc_nodup['Nurse CC List'][i] if x != 'nan']

# DATA MERGING

In [40]:
# Personal + ICD-10 + Drugs + Clinic + Label

df_final = df_personal_nodup.merge(df_icd10_nodup).merge(df_drug_nodup).merge(df_lab_nodup).merge(df_clinic_nodup).merge(df_doc_cc_nodup).merge(df_label_nodup)


df_final.replace("ช", 1, inplace=True)
df_final.replace("ญ", 0, inplace=True)


X = df_personal_nodup.merge(df_icd10_nodup).merge(df_drug_nodup).merge(df_drug_detail_nodup).merge(
    df_lab_nodup).merge(df_clinic_nodup).merge(df_doc_cc_nodup).merge(df_label_nodup)


df_final.shape

(398, 651)

# DATA TRANFORMATION

In [41]:
feature_convert = ['Glucose',
                   'Glycohemoglobin (HbA1c)',
                   'eGFR',
                   'Creatinine',
                   'BUN',
                   'Albumin',
                   'Uric Acid',
                   'Total Protein',
                   'Cholesterol',
                   'Triglyceride',
                   'HDL-c',
                   'LDL-c']

for feature in feature_convert:
    df_final[feature] = np.where(df_final[feature].isnull(), 0, df_final[feature])

In [42]:
# from sklearn.experimental import enable_iterative_imputer
# from sklearn.impute import IterativeImputer
# train_mice = train.copy(deep=True)

# mice_imputer = IterativeImputer()


# feature_impute_list = ['Temp', 'Pulse', 'RR', 'O2Sat', 'Body Weight', 'Height', 'BMI', 'sbp', 'dbp']

# for feature_impute in feature_impute_list:
#     df_final[feature_impute] = mice_imputer.fit_transform(df_final[[feature_impute]])

In [43]:
# df_final.to_csv('Data.csv')

# CLASSIFICATION

In [44]:
NCDs = ['DM', 'HTN', 'CKD', 'DLP']


# Dataframe report
result_report = pd.DataFrame(columns=['NCDs', 'Class', 'Precision', 'Recall', 'F1-score', 'Support'])
feature_report = pd.DataFrame(columns=['NCDs', 'Feature'])


for i in NCDs:
    
    # Import data
    df_model = df_final[df_final.columns[~df_final.columns.isin(['Subject_ID', 'HN'])]]

    # All features
    important_features = list(df_model.columns)
    important_features.remove('DM_label')
    important_features.remove('HTN_label')
    important_features.remove('CKD_label')
    important_features.remove('DLP_label')

    # Split dataset
    train, test = train_test_split(df_model, test_size = 0.25, random_state = 42)
    
    ytrain = train[i+'_label']
    ytest = test[i+'_label']

    Xtrain = train[important_features]
    Xtest = test[important_features]

    # Making a XGB classifier
    xgb = XGBClassifier()

    # Parameters setting
    parameters = {'scale_pos_weight': [9],
                  'n_estimators': [100, 200, 300],
                  'max_depth': [2, 3, 4, 5, 6],
                  'min_child_weight': [1, 10]
                 }

    clf = GridSearchCV(xgb, parameters, scoring='f1', cv=5, error_score='raise')
    clf.fit(Xtrain,ytrain)

    xgb.set_params(**clf.best_params_)
    xgb.fit(Xtrain, ytrain)
    ypred = xgb.predict(Xtest)

    # Feature Extraction
    feature = Xtrain.columns[xgb.feature_importances_ != 0].tolist()
    feature_list = [i, feature]
    feture_ncds = pd.Series(feature_list, index = feature_report.columns)
    feature_report = feature_report.append([feture_ncds])
    
    # Performance Report
    report = classification_report(ytest, ypred, output_dict=True)
    
    # Class 0
    precision_0 = report['0']['precision']
    recall_0 = report['0']['recall']
    f1_0 = report['0']['f1-score']
    support_0 = report['0']['support']
    
    # Class 1
    precision_1 = report['1']['precision']
    recall_1 = report['1']['recall']
    f1_1 = report['1']['f1-score']
    support_1 = report['1']['support']

    # Make a report
    label_0 = [i, '0', precision_0, recall_0, f1_0, support_0]
    label_1 = [i, '1', precision_1, recall_1, f1_1, support_1]
    report_0 = pd.Series(label_0, index = result_report.columns)
    report_1 = pd.Series(label_1, index = result_report.columns)
    result_report = result_report.append([report_0, report_1])
    
    
result_report.groupby(['NCDs', 'Class'], sort=False).first().apply(lambda x:round(x,3))

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall,F1-score,Support
NCDs,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DM,0,1.0,0.978,0.989,91
DM,1,0.818,1.0,0.9,9
HTN,0,1.0,1.0,1.0,91
HTN,1,1.0,1.0,1.0,9
CKD,0,1.0,1.0,1.0,92
CKD,1,1.0,1.0,1.0,8
DLP,0,1.0,1.0,1.0,89
DLP,1,1.0,1.0,1.0,11


In [46]:
feature_report

Unnamed: 0,NCDs,Feature
0,DM,"[SEX_x, AGE, E109, E119, Glucose, Glycohemoglobin (HbA1c), eGFR, Creatinine, Albumin, Temp, Pulse, RR, O2Sat, Body Weight, Height, BMI, sbp, dbp, dm_key, ht_key, dlp_key]"
0,HTN,"[AGE, B1810, E039, F510, I10, I482, N184, FOLT01, LORT05, MADT03, THYT02, XART02, eGFR, Creatinine, Albumin, LDL-c, Temp, Pulse, RR, O2Sat, Body Weight, Height, BMI, sbp, dbp, thyroid_key, dyslipidemia_key, ckd_key, ht_key, htn_key, dlp_key, esrd_key]"
0,CKD,"[AGE, D638, EPOI00, eGFR, Creatinine, BUN, Temp, Pulse, Body Weight, Height, BMI, sbp, dbp, ckd_key, ht_key, t2dm_key]"
0,DLP,"[AGE, AMLT00, Glucose, LDL-c, Temp, Pulse, O2Sat, Body Weight, Height, BMI, sbp, dbp, dyslipid_key, dyslipidemia_key, dm_key, dlp_key]"
