In [2]:
import argparse
import os
from scipy.stats import ttest_rel
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import SelectKBest
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier  
from sklearn.model_selection import KFold
import pandas as pd

# set the random state for reproducibility 
import numpy as np
np.random.seed(401)

classifiers = [DecisionTreeClassifier(), \
        GaussianNB(), \
        RandomForestClassifier(max_depth=5,n_estimators=10), \
        MLPClassifier(alpha=0.05), \
        AdaBoostClassifier()]


In [5]:
inpatient_full = pd.read_csv('/home/hassan/lily/MLA/FDA/inpatient_full.csv').iloc[:,1:]

In [6]:
inpatient_full

Unnamed: 0,Internalpatientid,num_stays,stay_length,num_unique_units,num_transfers,num_cvd_readmission,Readmission,Died,AO,CVD,...,Age 100-120,age_mean,age_std,age_min,age_max,stay_min,stay_max,stay_mean,stay_std,freq
0,1,4,15.89,2,0,0,1,0,0,0,...,0,71.754872,5.070261,67.676524,78.725684,1.73,1.73,3.972500,1.849998,0.33
1,2,22,93.26,5,2,10,1,0,0,1,...,0,65.560278,3.842360,55.027326,68.966075,0.09,0.09,4.239091,5.357757,1.57
2,3,2,407.50,2,0,0,1,1,0,1,...,0,79.960678,1.716789,78.746725,81.174631,1.83,1.83,203.750000,285.558003,0.67
3,4,1,7.15,1,0,0,0,0,0,1,...,0,84.167594,0.000000,84.167594,84.167594,7.15,7.15,7.150000,0.000000,1.00
4,5,2,8.45,2,0,0,1,1,0,1,...,0,76.449088,0.053764,76.411071,76.487105,1.04,1.04,4.225000,4.504270,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84531,169055,1,6.47,1,0,0,0,0,0,1,...,0,58.911722,0.000000,58.911722,58.911722,6.47,6.47,6.470000,0.000000,1.00
84532,169057,29,98.36,3,2,13,1,0,0,1,...,0,81.597657,4.761612,73.477044,85.802822,0.42,0.42,3.391724,3.378183,2.23
84533,169060,8,45.38,2,0,5,1,0,0,1,...,0,66.032022,6.666523,56.939978,71.291995,0.07,0.07,5.672500,7.616855,0.53
84534,169062,12,178.63,4,1,0,1,1,1,0,...,0,72.649839,1.358025,70.507481,74.065762,1.32,1.32,14.885833,16.735598,3.00


In [11]:
inpatient_test = pd.read_csv('/data/public/MLA/VCHAMPS-Quality/inpatient_admissions_qual_Formatfixed.csv').iloc[:,1:]


In [16]:
inpatient_test

Unnamed: 0,X,Internalpatientid,Age.at.admission,Admission.date,Discharge.date,Admitting.unit.service,Discharging.unit.service,Admitting.specialty,Discharging.specialty,First.listed.discharge.diagnosis.icd10.subcategory,Second.listed.discharge.diagnosis.icd10.subcategory,Discharge.disposition,Died.during.admission,Outpatientreferralflag,Serviceconnectedflag,Agentorangeflag,State
0,4,100012,55.317020,2004-10-25 08:54:01.0,2004-10-26 08:05:06.0,SURGERY,SURGERY,NEUROSURGERY,GENERAL SURGERY,Other and unspecified noninfective gastroenter...,Other specified disorders of white blood cells,Regular,,Yes,,,New Mexico
1,82,100399,85.706740,2010-03-24 19:31:38.0,2012-11-10 19:50:48.0,NHCU,NHCU,DOMICILIARY,NH HOSPICE,Unspecified mental disorder due to known physi...,"Malignant neoplasm of stomach, unspecified",Death without autopsy,,,No,No,Minnesota
2,154,100694,83.926120,2016-01-12 23:55:24.0,2016-01-13 20:55:24.0,NON-COUNT,NON-COUNT,SPINAL CORD INJURY,MEDICAL OBSERVATION,Abnormal levels of other serum enzymes,Other acute ischemic heart diseases,Regular,,Yes,,No,Idaho
3,155,100694,88.611203,2020-09-20 09:28:13.0,2020-09-22 11:36:18.0,NON-COUNT,NON-COUNT,SPINAL CORD INJURY,MEDICAL OBSERVATION,"Viral intestinal infection, unspecified",Hypo-osmolality and hyponatremia,Regular,,Yes,,No,Idaho
4,322,101407,88.925931,2009-05-03 10:15:50.0,2009-05-04 13:05:08.0,MEDICINE,MEDICINE,SPINAL CORD INJURY OBSERVATION,GENERAL(ACUTE MEDICINE),Unspecified dementia,Hypertensive chronic kidney disease with stage...,Regular,,Yes,,No,Louisiana
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4005,664197,99870,87.481429,2008-12-07 11:05:40.0,2008-12-27 10:57:14.0,MEDICINE,MEDICINE,INTERMEDIATE MEDICINE,GENERAL(ACUTE MEDICINE),Spinal stenosis,"Other thoracic, thoracolumbar and lumbosacral ...",Regular,,No,,,South Dakota
4006,664747,95448,47.736311,2009-03-21 14:55:07.0,2009-03-23 12:33:43.0,MEDICINE,MEDICINE,INTERMEDIATE MEDICINE,GENERAL(ACUTE MEDICINE),Other chest pain,Hypertensive chronic kidney disease with stage...,Regular,,Yes,,No,Nevada
4007,665478,98416,78.000151,2005-09-12 01:19:43.0,2005-09-18 19:18:02.0,MEDICINE,MEDICINE,SUBSTANCE ABUSE TRMT UNIT,CARDIOLOGY,"Heart failure, unspecified","Nonrheumatic aortic valve disorder, unspecified",Regular,,No,,,California
4008,665639,99137,88.265051,2019-01-13 21:01:36.0,2019-01-20 22:03:19.0,MEDICINE,MEDICINE,Not specified,GENERAL(ACUTE MEDICINE),Diastolic (congestive) heart failure,"Chronic kidney disease, stage 4 (severe)",Regular,,Yes,,No,Texas


In [15]:
pd.isnull(inpatient_test['Died.during.admission']).sum()

4010

In [8]:
x = inpatient_full.drop(columns=['Readmission',	'Died'])
y = inpatient_full['Died']

In [None]:
x_train,x_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.3)
