In [49]:
import sqlite3 as db
import pandas as pd
import featuretools as ft
import json

In [50]:
# Hacer una query SQL
def sql_query(q):
    conn = db.connect('../db/sqlite/eicu_v2_0_1.sqlite3')
    df = pd.read_sql_query(q, conn)
    conn.close()
    
    return df

# Leer todos los CSV
def read_csvs():
    #import os
    #print( os.getcwd())
    datasets = [ 'admissiondrug', 'admissionDx', 'allergy', 'apacheApsVar', 'apachePatientResult', 'apachePredVar', 'carePlanCareProvider', 'carePlanEOL', 'carePlanGeneral',
                 'carePlanGoal', 'carePlanInfectiousDisease', 'customLab', 'diagnosis', 'hospital', 'infusiondrug', 'intakeOutput', 'lab', 'medication', 'microLab', 'note',
                 'nurseAssessment', 'nurseCare', 'nurseCharting', 'pastHistory', 'patient', 'physicalExam', 'respiratoryCare', 'respiratoryCharting', 'treatment', 'vitalAperiodic',
                 'vitalPeriodic']

    dfs = {}

    for ds_name in datasets:
        dfs[ds_name] = pd.read_csv('../db/csv/' + ds_name + '.csv')
    
    return dfs

dfs = read_csvs()
has_dropped_keys = False # Para que no se droppeen cada vez que se ejecuta la celda siguiente

  dfs[ds_name] = pd.read_csv('../db/csv/' + ds_name + '.csv')


In [51]:
# Dropear IDs de todas las tablas (primera columna)
def drop_keys(dfs):
    for df_key in dfs.keys():
        if df_key not in ('hospital', 'patient'): # No dropear hospitalId o patientUnitStayId
            df = dfs[df_key]
            dfs[df_key] = df.drop(columns=[df.columns.values[0]])

if not has_dropped_keys:
    drop_keys(dfs)
    has_dropped_keys = True

In [52]:
dfs['carePlanGoal']

Unnamed: 0,patientunitstayid,cplgoaloffset,cplgoalcategory,cplgoalvalue,cplgoalstatus,activeupondischarge
0,1318254,800,Infection/Labs,Normal electrolytes,Active,True
1,1318254,800,Infection/Labs,Absence of sepsis,Active,True
2,1318254,800,Infection/Labs,Stable Hgb and Hct,Active,True
3,1318254,800,Cardiovascular,Vital signs within normal parameters,Active,True
4,1318254,36,Cardiovascular,Vital signs within normal parameters,Active,False
...,...,...,...,...,...,...
3628,3158919,1065,Fluid Balance/Treatments,IV fluids line(s) patent,Active,True
3629,3158919,853,Patient-Family,Orient patient to unit,Active,False
3630,3158919,1065,Infection/Labs,Normal electrolytes,Active,True
3631,3158919,853,Fluid Balance/Treatments,Urine output >30 ml/hr,Active,False


In [53]:
dfs['carePlanGoal']['cplgoalcategory'].value_counts()

# dfs['carePlanGoal'].info()

# Patient-Family              924
# Pulmonary                   604
# Fluid Balance/Treatments    534
# Activity-Safety             435
# Cardiovascular              413
# Infection/Labs              343

Patient-Family              924
Pulmonary                   604
Fluid Balance/Treatments    534
Activity-Safety             435
Cardiovascular              413
Infection/Labs              343
Nutrition/Skin              160
Neurologic                  159
Other                        61
Name: cplgoalcategory, dtype: int64

In [54]:
def has_Patient_Family (row):
    return 1 if row['cplgoalcategory'] == 'Patient-Family' else 0

def has_Pulmonary (row):
    return 1 if row['cplgoalcategory'] == 'Pulmonary' else 0

def has_Fluid_Balance_Treatments (row):
    return 1 if row['cplgoalcategory'] == 'Fluid Balance/Treatments' else 0

def has_Activity_Safety (row):
    return 1 if row['cplgoalcategory'] == 'Activity-Safety' else 0

def has_Cardiovascular  (row):
    return 1 if row['cplgoalcategory'] == 'Cardiovascular' else 0

def has_Infection_Labs  (row):
    return 1 if row['cplgoalcategory'] == 'Infection/Labs' else 0

In [55]:
dfs['carePlanGoal'] = dfs['carePlanGoal'].drop(columns=[
    'cplgoaloffset', 
    'cplgoalvalue',
    'cplgoalstatus',
    'activeupondischarge'
], axis=1)

In [56]:
dfs['carePlanGoal']

Unnamed: 0,patientunitstayid,cplgoalcategory
0,1318254,Infection/Labs
1,1318254,Infection/Labs
2,1318254,Infection/Labs
3,1318254,Cardiovascular
4,1318254,Cardiovascular
...,...,...
3628,3158919,Fluid Balance/Treatments
3629,3158919,Patient-Family
3630,3158919,Infection/Labs
3631,3158919,Fluid Balance/Treatments


In [57]:
_dfs = dfs["carePlanGoal"]['patientunitstayid']

category_Patient_Family            = dfs["carePlanGoal"].apply(lambda row : has_Patient_Family(row), axis=1)
category_Pulmonary                 = dfs["carePlanGoal"].apply(lambda row : has_Pulmonary(row), axis=1)
category_Fluid_Balance_Treatments  = dfs["carePlanGoal"].apply(lambda row : has_Fluid_Balance_Treatments(row), axis=1)
category_Activity_Safety           = dfs["carePlanGoal"].apply(lambda row : has_Activity_Safety(row), axis=1)
category_Cardiovascular            = dfs["carePlanGoal"].apply(lambda row : has_Cardiovascular(row), axis=1)
category_Infection_Labs            = dfs["carePlanGoal"].apply(lambda row : has_Infection_Labs(row), axis=1)

_category_Patient_Family           = pd.DataFrame(category_Patient_Family, columns=['Care_Patient_Family'])
_category_Pulmonary                = pd.DataFrame(category_Pulmonary, columns=['Care_Pulmonary'])
category_Fluid_Balance_Treatments  = pd.DataFrame(category_Fluid_Balance_Treatments, columns=['Care_Fluid_Balance_Treatments'])
_category_Activity_Safety          = pd.DataFrame(category_Activity_Safety, columns=['Care_Activity_Safety'])
_category_Cardiovascular           = pd.DataFrame(category_Cardiovascular, columns=['Care_Cardiovascular'])
_category_Infection_Labs           = pd.DataFrame(category_Infection_Labs, columns=['Care_Infection_Labs'])

categoria = pd.concat([_dfs, _category_Patient_Family ,_category_Pulmonary,category_Fluid_Balance_Treatments, _category_Activity_Safety, _category_Cardiovascular, _category_Infection_Labs ], axis=1)
#categoria = categoria.drop_duplicates()
categoria = categoria.groupby(['patientunitstayid']).sum()
categoria = categoria.reset_index()

dfs['carePlanGoal'] = categoria

In [58]:
dfs['carePlanGoal']

Unnamed: 0,patientunitstayid,Care_Patient_Family,Care_Pulmonary,Care_Fluid_Balance_Treatments,Care_Activity_Safety,Care_Cardiovascular,Care_Infection_Labs
0,264423,0,0,0,0,2,0
1,272886,0,0,0,0,0,0
2,281132,0,0,0,0,2,0
3,524799,0,0,0,0,1,0
4,994512,13,4,1,8,2,2
...,...,...,...,...,...,...,...
181,3154481,8,16,0,25,10,7
182,3155594,4,0,0,1,1,1
183,3155694,3,2,0,0,1,1
184,3157219,0,8,0,0,0,0


In [59]:
dfs['carePlanGoal'].info()
print()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 186 entries, 0 to 185
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype
---  ------                         --------------  -----
 0   patientunitstayid              186 non-null    int64
 1   Care_Patient_Family            186 non-null    int64
 2   Care_Pulmonary                 186 non-null    int64
 3   Care_Fluid_Balance_Treatments  186 non-null    int64
 4   Care_Activity_Safety           186 non-null    int64
 5   Care_Cardiovascular            186 non-null    int64
 6   Care_Infection_Labs            186 non-null    int64
dtypes: int64(7)
memory usage: 10.3 KB

