In [1]:
import sqlite3 as db
import pandas as pd
import featuretools as ft
import json

In [12]:
#
# medication: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False-
#
def read_csvs():
    datasets = [ 'admissiondrug', 'admissionDx', 'allergy', 'apacheApsVar', 'apachePatientResult', 'apachePredVar', 'carePlanCareProvider', 'carePlanEOL', 'carePlanGeneral',
                 'carePlanGoal', 'carePlanInfectiousDisease', 'customLab', 'diagnosis', 'hospital', 'infusiondrug', 'intakeOutput', 'lab',
                 #'medication',
                 'microLab', 'note',
                 'nurseAssessment', 'nurseCare', 'nurseCharting', 'pastHistory', 'patient', 'physicalExam', 'respiratoryCare', 'respiratoryCharting', 'treatment', 'vitalAperiodic',
                 'vitalPeriodic']

    dfs = {}

    for ds_name in datasets:
        dfs[ds_name.lower()] = (pd.read_csv('../db/csv/' + ds_name + '.csv'), )
    
    return dfs

def read_query(q):
    conn = db.connect('../db/sqlite/eicu_v2_0_1.sqlite3')
    df = pd.read_sql_query(q, conn)
    conn.close()
    
    return df

def make_relationships(dfs):
    relationships = []
    pk_fk = json.loads( open('keys.json').read() )
    i = 0

    for ds_name in pk_fk:
        #if pk_fk[ds_name]['pk'] != False:
        #    dfs[ds_name][0].set_index(pk_fk[ds_name]['pk'])
        
        if pk_fk[ds_name]['fk'] != False and ds_name not in ('hospital', 'medication'):
            #print(ds_name, pk[ds_name]['fk'])
            fk_atr, target_table, target_atr = pk_fk[ds_name]['fk']
            
            #print((target_table, target_atr, ds_name, fk_atr))
            relationships.append((target_table, target_atr, ds_name, fk_atr))
            
    relationships.append(('hospital', 'hospitalid', 'patient', 'hospitalid'))

    return relationships

#------------------------------------------------------------------------------------------------------

dfs = read_csvs()
relationships = make_relationships(dfs)

#feature_matrix, feature_defs = ft.dfs(
#    dataframes=dfs,
#    relationships=relationships,
#    target_dataframe_name='patient',
#)

dfs['patient'][0]

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141764,129391,Female,87,Caucasian,59,91,,157.5,23:36:00,...,ICU to SDU,2,stepdown/other,,,18:58:00,344,Home,Alive,002-1039
1,141765,129391,Female,87,Caucasian,59,91,"Rhythm disturbance (atrial, supraventricular)",157.5,23:36:00,...,Emergency Department,1,admit,46.5,45.0,13:14:00,2250,Step-Down Unit (SDU),Alive,002-1039
2,143870,131022,Male,76,Caucasian,68,103,"Endarterectomy, carotid",167.0,20:46:00,...,Operating Room,1,admit,77.5,79.4,10:00:00,793,Floor,Alive,002-12289
3,144815,131736,Female,34,Caucasian,56,82,"Overdose, other toxin, poison or drug",172.7,01:44:00,...,Emergency Department,1,admit,60.3,60.7,20:48:00,1121,Other External,Alive,002-1116
4,145427,132209,Male,61,Caucasian,68,103,"GI perforation/rupture, surgery for",177.8,23:48:00,...,Operating Room,1,admit,91.7,93.1,22:47:00,1369,Floor,Alive,002-12243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2515,3351763,2741766,Female,62,Caucasian,459,1108,"Fistula/abscess, surgery for (not inflammatory...",165.1,16:08:00,...,Operating Room,1,admit,134.5,133.3,19:24:00,5394,Step-Down Unit (SDU),Alive,035-10391
2516,3352230,2742186,Male,41,African American,458,1107,"CABG alone, coronary artery bypass grafting",177.8,21:21:00,...,Operating Room,2,transfer,127.0,128.5,21:34:00,4261,Telemetry,Alive,035-10089
2517,3352231,2742186,Male,41,African American,458,1104,"Infarction, acute myocardial (MI)",177.8,21:21:00,...,Direct Admit,1,admit,127.0,135.2,22:26:00,1369,Other ICU,Alive,035-10089
2518,3352333,2742269,Male,72,Caucasian,458,1111,GI obstruction,177.8,20:00:00,...,Other Hospital,1,admit,68.3,66.5,17:26:00,4166,Floor,Alive,035-10041


In [10]:
dfs['apacheapsvar'][0]

Unnamed: 0,apacheapsvarid,patientunitstayid,intubated,vent,dialysis,eyes,motor,verbal,meds,urine,...,ph,hematocrit,creatinine,albumin,pao2,pco2,bun,glucose,bilirubin,fio2
92788,92788,141765,0,0,0,4,6,5,0,-1.0000,...,-1.000,37.8,1.04,-1.0,-1.0,-1.0,28.0,61,-1.0,-1
8893,8893,143870,0,0,0,4,6,5,0,-1.0000,...,-1.000,34.1,1.14,-1.0,-1.0,-1.0,14.0,140,-1.0,-1
79585,79585,144815,0,0,0,4,6,5,0,-1.0000,...,-1.000,36.6,0.63,3.6,-1.0,-1.0,6.0,82,0.5,-1
203242,203242,145427,0,0,0,4,6,5,0,-1.0000,...,-1.000,40.4,1.05,-1.0,-1.0,-1.0,14.0,118,-1.0,-1
154681,154681,147307,0,0,0,4,6,5,0,-1.0000,...,-1.000,-1.0,-1.00,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1356309,1356309,3351763,1,1,0,4,5,1,0,3344.5440,...,7.379,26.0,0.88,1.5,104.0,47.6,31.0,149,0.4,60
1347458,1347458,3352230,1,1,0,3,6,5,0,3946.4928,...,7.622,27.0,1.31,-1.0,178.0,25.3,15.0,224,-1.0,100
1325380,1325380,3352231,0,0,0,4,6,5,0,736.3008,...,7.393,41.0,1.42,4.1,84.0,42.2,20.0,117,0.6,28
1555124,1555124,3352333,0,0,0,4,6,4,0,1545.7824,...,7.531,34.0,0.97,3.3,59.0,35.3,20.0,102,0.8,40


In [None]:
#read_query("""
#SELECT * FROM patient
#""")

In [None]:
#read_query("""
#SELECT * FROM apacheApsVar INNER JOIN apachePatientResult ON (apacheApsVar.patientUnitStayID = apachePatientResult.patientUnitStayID)
#                           INNER JOIN apachePredVar ON (apacheApsVar.patientUnitStayID = apachePredVar.patientUnitStayID)
#""")

In [None]:
#df[ df['unitdischargestatus'] == df['uniquepid'][0] ]
#len(patient_df[ patient_df['unitdischargestatus'] == 'Alive' ]) # 2392 alive, 126 dead, 2 NULL