In [53]:
import boto3
import pandas as pd
import numpy as np
import re
from sagemaker import get_execution_role
role = get_execution_role()
s3 = boto3.resource('s3')

# ICUsICS DB tutorial

ICUsICS is an anonymized database built from the data stored into the Clinical Information System (CIS) database of 6 Intensive Care Units (ICUs) from the Catalan Institute of Health (ICS). Actually, it is a database of databases, because each ICU belongs to a different hospital and each CIS presents its own particularities. However, the tables architecture of each database is identical between hospitals, which facilitates data search and extraction.

ICUsICS is not hosted as a database 'per se', but as a directory of folders (tables) with csv files inside (registries). Inside icuics-db, there are 6 folders, 1 for each hospital, and inside them there are 10 folders:  

patients: it contains patient-level info (id, hospital, demografics and admission and discharge time and wards)  
diagnoses: table with the diagnoses  
insertions: table with the insertions  
variables_ref: it contains info of the variables present in the database (id, hospital, name, type). Key info: vartype 1, 2, 4 and 8 mean v_monitored, v_labres, v_observed and v_derived respectivelly (the table where the variable is stored). Key info: datatype 0, 1 and 2 mean numeric, categoric and checkbox respectivelly.  
v_monitored: table with registries for vartype 1 variables  
v_labres: table with registries for vartype 2 variables  
v_observed: table with registries for vartype 4 variables  
v_derived: table with registries for vartype 8 variables  
drugs_ref: it contains info of the drugs present in the database (id, hospital, name, formunit, unit, etc.)  
drugs: table with registries for drugs 

## Set database and hospital

In [3]:
db='icusics-db-demo'
h='h3'

### Checking K=5 anonymization

In [4]:
patients = pd.read_csv(f's3://{db}/{h}_db/patients/{h}_patients_ref.csv')

In [5]:
patients.groupby(['patientsex','age','height','weight','hospital_outcome'], as_index=False).agg({'a_patientid':'nunique'})['a_patientid'].min()

5

### Example: Creating a cohort of patients with:  
1- ICU_LOS > 7 days  
2- Primary diagnose of pneumonia (any type) and secondary diagnose of myopathy (any type)  
3- Central Venous Cateter (CVC)  
4- Invasive Mechanical Ventilation (IMV)  
5- APACHE2 > 20  
6- Lactate (arterial) > 2mmol/L at first ICU day  
7- Respiratory Rate (RR) > 30   
9- Sedative Drugs (VAD)  

patients: it contains patient-level info (id, hospital, demografics and admission and discharge time and wards)  
diagnoses: table with the diagnoses  
insertions: table with the insertions  
variables_ref: it contains info of the variables present in the database (id, hospital, name, type). Key info: vartype 1, 2, 4 and 8 mean v_monitored, v_labres, v_observed and v_derived respectivelly (the table where the variable is stored). Key info: datatype 0, 1 and 2 mean numeric, categoric and checkbox respectivelly.  
v_monitored: table with registries for vartype 1 variables  
v_labres: table with registries for vartype 2 variables  
v_observed: table with registries for vartype 4 variables  
v_derived: table with registries for vartype 8 variables  
drugs_ref: it contains info of the drugs present in the database (id, hospital, name, formunit, unit, etc.)  
drugs: table with registries for drugs 

### 1- ICU_LOS > 7 days 

In [6]:
patients = pd.read_csv(f's3://{db}/{h}_db/patients/{h}_patients_ref.csv')

In [7]:
patients.head(1)

Unnamed: 0,a_patientid,hospital_coded,patientsex,age,height,weight,bmi,hospadmtime,admwardname,distime,diswardname,hospdistime,hospital_outcome
0,3861270,3,M,70,160,80,31,-683,UCI GENERAL,14722,UCI GENERAL,34887,ALIVE


In [8]:
los7d = patients[patients['distime']>10080] # 7 days = 10080 minutes

### 2- Primary diagnose of pneumonia (any type) and secondary diagnose of myopathy (any type) 

In [9]:
diags = pd.read_csv(f's3://{db}/{h}_db/diagnoses/{h}_diagnoses.csv')

In [10]:
diags.head(1)

Unnamed: 0,a_patientid,hospital_coded,diag_type,referencecode,referencecodename
0,3070757,3,secondary,976.0/2,farmacs antiinfecciosos i antiinflamatoris lo...


In [11]:
dp_pneumo_codes = tuple(set(diags[diags['referencecodename'].str.contains('pneum', case=False)]['referencecode']))
ds_miopat_codes = tuple(set(diags[diags['referencecodename'].str.contains('miopat', case=False)]['referencecode']))

In [12]:
dp_patlist = tuple(set(diags[(diags['diag_type']=='primary') & (diags['referencecode'].isin(dp_pneumo_codes))]['a_patientid']))
ds_patlist = tuple(set(diags[(diags['diag_type']=='secondary') & (diags['referencecode'].isin(ds_miopat_codes))]['a_patientid']))
d_patlist = list(set(dp_patlist).intersection(ds_patlist))

In [13]:
los7d_diags = los7d[los7d['a_patientid'].isin(d_patlist)]

### 3- Central Venous Cateter (CVC)  

In [14]:
insertions = pd.read_csv(f's3://{db}/{h}_db/insertions/{h}_insertions.csv')

In [15]:
insertions.head(1)

Unnamed: 0,a_patientid,a_insertionid,hospital_coded,starttime,endtime,insertionname,insertionplacename
0,3861270,3000000484,3,253,5798,TUB ENDOTRAQUEAL,BOCA


In [16]:
cvc_patlist = tuple(set(insertions[insertions['insertionname'].str.contains('central', case=False)]['a_patientid']))

In [17]:
los7d_diags_cvc = los7d_diags[los7d_diags['a_patientid'].isin(cvc_patlist)]

### 4- Invasive Mechanical Ventilation (IMV)  

#### First, import variables_ref table to look for the variable code

In [18]:
variables_ref = pd.read_csv(f's3://{db}/{h}_db/variables_ref/{h}_variables_ref.csv')

In [19]:
variables_ref.head(1)

Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
0,3000000100,3,1,0,PAs,PAs,1.REGISTRE MANUAL contingència,,


In [20]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'vent|mec|inv'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a categorical (datatype=1) and observed (vartype=4) \
feature called 'Teràpia real O2' with the option 'Vent Mecànica'")

(333, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a categorical (datatype=1) and observed (vartype=4) feature called 'Teràpia real O2' with the option 'Vent Mecànica'


In [21]:
result2 = result[(result['datatype']==1) & (result['vartype']==4) & (result['name'].str.contains('Teràpia real O2', case=False, na=False)) & (
    result['choicestringvalue'].str.contains('Vent Mecànica', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
1949,3015002262,3,4,1,Teràpia real O2,O2 Teràpia,DI 21.CONTROL RESPIRATORI.\nVariable utilitzad...,12.0,Vent Mecànica


#### Get patients with IMV registries for those who have accomplished with the inclusion criteria up to now

In [22]:
# take a look to the table structure using the first chunk of the table:

v_observed_chunk = pd.read_csv(f's3://{db}/{h}_db/v_observed/{h}_observed001.csv')

In [23]:
v_observed_chunk.head(1)

Unnamed: 0,a_patientid,a_variableid,time,value
0,3961761,3010000100,1167,1.0


In [24]:
%%time

bucket = s3.Bucket('icusics-db-demo')
imv_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_observed']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        imv_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3015002262) & (chunk['value']==12) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc['a_patientid']))))]['a_patientid']))
        imv_patlist = imv_patlist + imv_patlist_chunk

CPU times: user 15.6 s, sys: 1.59 s, total: 17.2 s
Wall time: 54.2 s


In [25]:
los7d_diags_cvc_imv = los7d_diags_cvc[los7d_diags_cvc['a_patientid'].isin(imv_patlist)]

#### 5- APACHE2 > 20 

In [26]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'apache'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and derived (vartype=8) \
feature called 'APACHE 2 validado'")

(62, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and derived (vartype=8) feature called 'APACHE 2 validado'


In [27]:
result2 = result[(result['datatype']==0) & (result['vartype']==8) & (result['name'].str.contains('apache 2 validado', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
5790,3030000350,3,8,0,APACHE 2 validado,APACHE 2 man,Validated APACHE II score,,


#### Get patients with an APACHE 2 higher of 20 for those who have accomplished with the inclusion criteria up to now

In [39]:
# take a look to the table structure using the first chunk of the table:

v_derived_chunk = pd.read_csv(f's3://{db}/{h}_db/v_derived/{h}_derived001.csv')
vderived_chunk = pd.read_csv(f's3://{db}/{h}_db/vderived/vderived_3000004_3009942.csv')
vderived_hagg_chunk = pd.read_csv(f's3://{db}/{h}_db/vderived_hagg/hagg_vderived_3000004_3009942.csv')

In [55]:
s = 'hagg_vderived_3000004_3009942.csv'

In [77]:
m = re.findall('\d+', s)

In [78]:
m

'3000004'

In [40]:
v_derived_chunk.head()

Unnamed: 0,a_patientid,a_variableid,time,value
0,3669892,3030000100,0,7.0
1,3669892,3030000100,112,7.0
2,3669892,3030000100,6241,7.0
3,3319305,3030000100,0,5.0
4,3319305,3030000100,4520,5.0


In [41]:
vderived_chunk.head()

Unnamed: 0,a_patientid,a_variableid,time,value
0,3001780,3030005060,127253,382.63
1,3001780,3030005060,127257,473.497
2,3001780,3030005060,127258,473.497
3,3001780,3030005060,127260,623.497
4,3001780,3030005060,127283,648.63


In [42]:
vderived_hagg_chunk.head()

Unnamed: 0,a_patientid,a_variableid,starttime,endtime,registries,min,max,median,sum
0,3000004,3030000100,0.0,60.0,2,0.0,0.0,0.0,0.0
1,3000004,3030000100,9660.0,9720.0,2,0.0,0.0,0.0,0.0
2,3000004,3030000114,1080.0,1140.0,1,26.0,26.0,26.0,26.0
3,3000004,3030000114,2580.0,2640.0,1,8.0,8.0,8.0,8.0
4,3000004,3030000114,3720.0,3780.0,1,12.0,12.0,12.0,12.0


In [123]:
%%time

bucket = s3.Bucket('icusics-db-demo')
apache2_20_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_derived']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        apache2_20_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3030000350) & (chunk['value']>20) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv['a_patientid']))))]['a_patientid']))
        apache2_20_patlist = apache2_20_patlist + apache2_20_patlist_chunk

CPU times: user 2min 7s, sys: 12.7 s, total: 2min 20s
Wall time: 7min 2s


In [125]:
sorted(apache2_20_patlist)

[3001780,
 3037324,
 3068913,
 3219293,
 3257877,
 3272164,
 3275221,
 3309946,
 3345064,
 3346463,
 3356461,
 3450913,
 3454744,
 3486278,
 3511143,
 3580417,
 3592961,
 3615379,
 3647997,
 3665240,
 3666383,
 3703957,
 3718524,
 3738345,
 3792693,
 3803957,
 3876105,
 3983254]

In [115]:
%%time

bucket = s3.Bucket('icusics-db-demo')
apache2_20_patlist2 = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'vderived/']):
        
        boundaries = re.findall('\d+', my_bucket_object.key)
        lb = boundaries.pop(1)
        ub = boundaries.pop()
        for patid in sorted(list(set(los7d_diags_cvc_imv['a_patientid'].astype(str)))):
            if patid>lb and patid<ub:
                chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
                apache2_20_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3030000350) & (chunk['value']>20) & (
                    chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv['a_patientid']))))]['a_patientid']))
                apache2_20_patlist2 = apache2_20_patlist2 + apache2_20_patlist_chunk

CPU times: user 1min 42s, sys: 11.7 s, total: 1min 54s
Wall time: 3min 57s


In [121]:
apache2_20_patlist2

(3001780,
 3037324,
 3068913,
 3219293,
 3219293,
 3219293,
 3257877,
 3257877,
 3257877,
 3272164,
 3275221,
 3272164,
 3275221,
 3309946,
 3309946,
 3345064,
 3346463,
 3345064,
 3346463,
 3345064,
 3346463,
 3356461,
 3454744,
 3450913,
 3454744,
 3450913,
 3486278,
 3511143,
 3511143,
 3511143,
 3580417,
 3592961,
 3615379,
 3647997,
 3665240,
 3666383,
 3665240,
 3666383,
 3703957,
 3703957,
 3718524,
 3718524,
 3738345,
 3792693,
 3803957,
 3876105,
 3876105,
 3983254)

In [116]:
%%time

bucket = s3.Bucket('icusics-db-demo')
apache2_20_patlist3 = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'vderived_hagg/']):
        
        boundaries = re.findall('\d+', my_bucket_object.key)
        lb = boundaries.pop(1)
        ub = boundaries.pop()
        for patid in sorted(list(set(los7d_diags_cvc_imv['a_patientid'].astype(str)))):
            if patid>lb and patid<ub:
                chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
                apache2_20_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3030000350) & (chunk['max']>20) & (
                    chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv['a_patientid']))))]['a_patientid']))
                apache2_20_patlist3 = apache2_20_patlist3 + apache2_20_patlist_chunk

CPU times: user 40.8 s, sys: 3.76 s, total: 44.6 s
Wall time: 1min 51s


In [122]:
apache2_20_patlist3

(3001780,
 3037324,
 3068913,
 3219293,
 3219293,
 3219293,
 3257877,
 3257877,
 3257877,
 3272164,
 3275221,
 3272164,
 3275221,
 3309946,
 3309946,
 3345064,
 3346463,
 3345064,
 3346463,
 3345064,
 3346463,
 3356461,
 3454744,
 3450913,
 3454744,
 3450913,
 3486278,
 3511143,
 3511143,
 3511143,
 3580417,
 3592961,
 3615379,
 3647997,
 3665240,
 3666383,
 3665240,
 3666383,
 3703957,
 3703957,
 3718524,
 3718524,
 3738345,
 3792693,
 3803957,
 3876105,
 3876105,
 3983254)

In [117]:
los7d_diags_cvc_imv_apache = los7d_diags_cvc_imv[los7d_diags_cvc_imv['a_patientid'].isin(apache2_20_patlist)]

In [118]:
los7d_diags_cvc_imv_apache.shape

(0, 13)

#### 6- Lactate (arterial) > 2mmol/L at first ICU day 

In [36]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'lactat'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and labres (vartype=2) \
feature that contains 'GSA' label in the abbreviation")

(8, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and labres (vartype=2) feature that contains 'GSA' label in the abbreviation


In [37]:
result2 = result[(result['datatype']==0) & (result['vartype']==2) & (result['name'].str.contains('lactat', case=False, na=False)) & (
    result['abbreviation'].str.contains('GSA', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
5398,3024000658,3,2,0,Lactat art GSA,Lactat a GSA,,,
5435,3024000704,3,2,0,aSan-Lactat,Lactat _GSA,,,


#### Get patients with an arterial lactate higher than 2mmol/L during the first ICU day for those who have accomplished with the inclusion criteria

In [38]:
# take a look to the table structure using the first chunk of the table:

v_labres_chunk = pd.read_csv(f's3://{db}/{h}_db/v_labres/{h}_labs001.csv')

In [39]:
v_labres_chunk.head(1)

Unnamed: 0,a_patientid,a_variableid,time,value
0,3070757,3020000100,21140,27.0


In [40]:
%%time

bucket = s3.Bucket('icusics-db-demo')
f_lactate_2_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_labres']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        f_lactate_2_patlist_chunk = tuple(set(chunk[(chunk['a_variableid'].isin([3024000658,3024000704])) & (chunk['value']>2) & (chunk['time']<1440) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache['a_patientid']))))]['a_patientid']))
        f_lactate_2_patlist = f_lactate_2_patlist + f_lactate_2_patlist_chunk

CPU times: user 1.54 s, sys: 104 ms, total: 1.64 s
Wall time: 3.87 s


In [41]:
los7d_diags_cvc_imv_apache_lactate = los7d_diags_cvc_imv_apache[los7d_diags_cvc_imv_apache['a_patientid'].isin(f_lactate_2_patlist)]

#### 7- Respiratory Rate (RR) > 30 

In [46]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'respiratory rate|rr|fr'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and monitored (vartype=1) \
feature that is named 'FR monitor'")

(359, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and monitored (vartype=1) feature that is named 'FR (m)'


In [73]:
result2 = result[(result['datatype']==0) & (result['vartype']==1) & (result['name'].str.contains('FR monitor', case=True, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
190,3000005600,3,1,0,FR monitor,FR monitor,,,


In [74]:
%%time

bucket = s3.Bucket('icusics-db-demo')
rr_16_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_monitored']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        rr_16_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3000005600) & (chunk['value']>30) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache_lactate['a_patientid']))))]['a_patientid']))
        rr_16_patlist = rr_16_patlist + rr_16_patlist_chunk

CPU times: user 38.9 s, sys: 4.2 s, total: 43.1 s
Wall time: 1min 47s


In [75]:
los7d_diags_cvc_imv_apache_lactate_rr = los7d_diags_cvc_imv_apache_lactate[los7d_diags_cvc_imv_apache_lactate['a_patientid'].isin(rr_16_patlist)]

#### 9- Sedative Drugs (VAD)

In [77]:
drugs_ref = pd.read_csv(f's3://{db}/{h}_db/drugs_ref/{h}_drugs_ref.csv')

In [78]:
drugs_ref.head(1)

Unnamed: 0,a_pharmaid,hospital_coded,pharmaname,pharmagroupname,pharmaformunit,pharmadoseunit,pharmadoseformratio,pharmavolumeunit,pharmavolumeformratio
0,3000000004,3,COMPLEX DE PROTROMBINA 600 UI (F. IX) vi,.Antihemorràgics,vial,UI,600.0,ml,0.0


In [82]:
drugs = pd.read_csv(f's3://{db}/{h}_db/drugs/{h}_drugs001.csv')

In [83]:
drugs.head(1)

Unnamed: 0,a_patientid,a_pharmaid,time,routename,routeid,rate,givendose
0,3376095,3000000104,1292,PERF IV,98,10.0,0.010033


In [88]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'sed'

result = drugs_ref[(drugs_ref['pharmagroupname'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("We have 18 drugs in the group of sedatives, so we save all them in a tuple and search for them in drugs table'")

sedatives_ids = tuple(set(result['a_pharmaid']))

(18, 9)
We have 18 drugs in the group of sedatives, so we save all them in a tuple and search for them in drugs table'


In [102]:
%%time

bucket = s3.Bucket('icusics-db-demo')
sedatives_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'drugs']):
        
        if 'ref' not in my_bucket_object.key:
            
            chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
            sedatives_patlist_chunk = tuple(set(chunk[(chunk['a_pharmaid'].isin(sedatives_ids)) & (chunk['routename']=='PERF IV CONT') & (
                chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache_lactate_rr['a_patientid']))))]['a_patientid']))
            sedatives_patlist = sedatives_patlist + sedatives_patlist_chunk

CPU times: user 14.6 s, sys: 1.31 s, total: 15.9 s
Wall time: 38.6 s


In [103]:
los7d_diags_cvc_imv_apache_lactate_rr_sedatives = los7d_diags_cvc_imv_apache_lactate_rr[los7d_diags_cvc_imv_apache_lactate_rr['a_patientid'].isin(sedatives_patlist)]

In [104]:
print('patients in ICUSICS demo database',
      patients['a_patientid'].nunique())
print('patients with ICU LOS > 7 days:',
      los7d['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd:',
      los7d_diags['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc:',
      los7d_diags_cvc['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv:',
      los7d_diags_cvc_imv['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20:',
      los7d_diags_cvc_imv_apache['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1:',
      los7d_diags_cvc_imv_apache_lactate['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1 and rr > 20:',
      los7d_diags_cvc_imv_apache_lactate_rr['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1 and rr > 20 and sedatives:',
      los7d_diags_cvc_imv_apache_lactate_rr_sedatives['a_patientid'].nunique())

patients in ICUSICS demo database 6928
patients with ICU LOS > 7 days: 2233
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd: 81
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc: 80
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv: 79
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20: 28
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1: 12
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1 and rr > 20: 10
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and lactate >2mmol/L at day 1 and rr > 20 and sedatives: 9


We see how only 9 patients in that database fullfilled the inclusion criteria of this example. The objective was to explore all tables, not to obtain a real dataset

In [105]:
los7d_diags_cvc_imv_apache_lactate_rr_sedatives

Unnamed: 0,a_patientid,hospital_coded,patientsex,age,height,weight,bmi,hospadmtime,admwardname,distime,diswardname,hospdistime,hospital_outcome
1482,3345064,3,M,30,170,70,24,-4,URGÈNCIES,52365,MEDICINA INTERNA,72262,ALIVE
2494,3615379,3,M,70,170,60,21,-11,URGÈNCIES,25791,MEDICINA INTERNA,45771,ALIVE
4239,3803957,3,F,40,170,60,21,-96,URGÈNCIES,91164,MEDICINA INTERNA,115427,ALIVE
4856,3001780,3,M,80,160,70,27,-55,URGÈNCIES,165263,MEDICINA INTERNA,188117,ALIVE
5112,3703957,3,M,70,170,70,24,0,H VALLS,57282,EXITUS,57725,EXITUS
5555,3792693,3,M,80,170,70,24,-2795,MEDICINA INTERNA,110656,REHABILITACIÓ,117747,ALIVE
5609,3219293,3,M,70,170,90,31,0,UCIM-UCO Joan XXIII,114444,PNEUMOLOGIA,129104,EXITUS
6368,3647997,3,M,70,170,80,28,-17,URGÈNCIES,21456,MEDICINA INTERNA,27253,ALIVE
6692,3257877,3,M,70,180,90,28,-1,H VENDRELL,77472,MEDICINA INTERNA,91648,ALIVE


In [106]:
los7d_diags_cvc_imv_apache_lactate_rr_sedatives['hospital_outcome'].value_counts(normalize=True)

ALIVE     0.777778
EXITUS    0.222222
Name: hospital_outcome, dtype: float64