In [2]:
import boto3
import pandas as pd
from sagemaker import get_execution_role
role = get_execution_role()
s3 = boto3.resource('s3')

# ICUsICS DB tutorial

ICUsICS is an anonymized database built from the data stored into the Clinical Information System (CIS) database of 6 Intensive Care Units (ICUs) from the Catalan Institute of Health (ICS). Actually, it is a database of databases, because each ICU belongs to a different hospital and each CIS presents its own particularities. However, the tables architecture of each database is identical between hospitals, which facilitates data search and extraction.

ICUsICS is not hosted as a database 'per se', but as a directory of folders (tables) with csv files inside (registries). Inside icuics-db, there are 6 folders, 1 for each hospital, and inside them there are 10 folders:  

patients: it contains patient-level info (id, hospital, demografics and admission and discharge time and wards)  
diagnoses: table with the diagnoses  
insertions: table with the insertions  
variables_ref: it contains info of the variables present in the database (id, hospital, name, type). Key info: vartype 1, 2, 4 and 8 mean v_monitored, v_labres, v_observed and v_derived respectivelly (the table where the variable is stored). Key info: datatype 0, 1 and 2 mean numeric, categoric and checkbox respectivelly.  
v_monitored: table with registries for vartype 1 variables  
v_labres: table with registries for vartype 2 variables  
v_observed: table with registries for vartype 4 variables  
v_derived: table with registries for vartype 8 variables  
drugs_ref: it contains info of the drugs present in the database (id, hospital, name, formunit, unit, etc.)  
drugs: table with registries for drugs 

### Checking K=5 anonymization

In [None]:
patients = pd.read_csv(f's3://{db}/{h}_db/patients/{h}_patients_ref.csv')

In [52]:
patients.groupby(['patientsex','age','height','weight','hospital_outcome'], as_index=False).agg({'a_patientid':'nunique'})['a_patientid'].min()

5

### Example: Creating a cohort of patients with:  
1- ICU_LOS > 7 days  
2- Primary diagnose of pneumonia (any type) and secondary diagnose of myopathy (any type)  
3- Central Venous Cateter (CVC)  
4- Invasive Mechanical Ventilation (IMV)  
5- APACHE2 > 20  
6- Lactate (arterial) > 2mmol/L at first ICU day  
7- Respiratory Rate (RR) > 20   
9- Sedative Drugs (VAD)  

patients: it contains patient-level info (id, hospital, demografics and admission and discharge time and wards)  
diagnoses: table with the diagnoses  
insertions: table with the insertions  
variables_ref: it contains info of the variables present in the database (id, hospital, name, type). Key info: vartype 1, 2, 4 and 8 mean v_monitored, v_labres, v_observed and v_derived respectivelly (the table where the variable is stored). Key info: datatype 0, 1 and 2 mean numeric, categoric and checkbox respectivelly.  
v_monitored: table with registries for vartype 1 variables  
v_labres: table with registries for vartype 2 variables  
v_observed: table with registries for vartype 4 variables  
v_derived: table with registries for vartype 8 variables  
drugs_ref: it contains info of the drugs present in the database (id, hospital, name, formunit, unit, etc.)  
drugs: table with registries for drugs 

### Set database and hospital

In [3]:
db='icusics-db-demo'
h='h3'

### Get ICU-LOS filter in patients table

In [4]:
patients = pd.read_csv(f's3://{db}/{h}_db/patients/{h}_patients_ref.csv')

In [5]:
patients.head(1)

Unnamed: 0,a_patientid,hospital_coded,patientsex,age,height,weight,bmi,hospadmtime,admwardname,distime,diswardname,hospdistime,hospital_outcome
0,3861270,3,M,70,160,80,31,-683,UCI GENERAL,14722,UCI GENERAL,34887,ALIVE


In [6]:
los7d = patients[patients['distime']>10080] # 7 days = 10080 minutes

### Get diagnoses filter in diagnoses table

In [7]:
diags = pd.read_csv(f's3://{db}/{h}_db/diagnoses/{h}_diagnoses.csv')

In [8]:
diags.head(1)

Unnamed: 0,a_patientid,hospital_coded,diag_type,referencecode,referencecodename
0,3070757,3,secondary,976.0/2,farmacs antiinfecciosos i antiinflamatoris lo...


In [9]:
dp_pneumo_codes = tuple(set(diags[diags['referencecodename'].str.contains('pneum', case=False)]['referencecode']))
ds_miopat_codes = tuple(set(diags[diags['referencecodename'].str.contains('miopat', case=False)]['referencecode']))

In [10]:
dp_patlist = tuple(set(diags[(diags['diag_type']=='primary') & (diags['referencecode'].isin(dp_pneumo_codes))]['a_patientid']))
ds_patlist = tuple(set(diags[(diags['diag_type']=='secondary') & (diags['referencecode'].isin(ds_miopat_codes))]['a_patientid']))
d_patlist = list(set(dp_patlist).intersection(ds_patlist))

In [18]:
los7d_diags = los7d[los7d['a_patientid'].isin(d_patlist)]

### Get CVC filter in insertions table

In [17]:
insertions = pd.read_csv(f's3://{db}/{h}_db/insertions/{h}_insertions.csv')

In [19]:
insertions.head(1)

Unnamed: 0,a_patientid,a_insertionid,hospital_coded,starttime,endtime,insertionname,insertionplacename
0,3861270,3000000484,3,253,5798,TUB ENDOTRAQUEAL,BOCA


In [24]:
cvc_patlist = tuple(set(insertions[insertions['insertionname'].str.contains('central', case=False)]['a_patientid']))

In [46]:
los7d_diags_cvc = los7d_diags[los7d_diags['a_patientid'].isin(cvc_patlist)]

### Search for IMV, APACHE2, Lactate and TV in variables_ref table

In [31]:
variables_ref = pd.read_csv(f's3://{db}/{h}_db/variables_ref/{h}_variables_ref.csv')

In [68]:
variables_ref.head(1)

Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
0,3000000100,3,1,0,PAs,PAs,1.REGISTRE MANUAL contingència,,


#### Search for IMV:

In [36]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'vent|mec|inv'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a categorical (datatype=1) and observed (vartype=4) \
feature called 'Teràpia real O2' with the option 'Vent Mecànica'")

(333, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a categorical (datatype=1) and observed (vartype=4) feature called 'Teràpia real O2' with the option 'Vent Mecànica'


In [37]:
result2 = result[(result['datatype']==1) & (result['vartype']==4) & (result['name'].str.contains('Teràpia real O2', case=False, na=False)) & (
    result['choicestringvalue'].str.contains('Vent Mecànica', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
1949,3015002262,3,4,1,Teràpia real O2,O2 Teràpia,DI 21.CONTROL RESPIRATORI.\nVariable utilitzad...,12.0,Vent Mecànica


#### Get patients with IMV registries for those who have accomplished with the inclusion criteria

In [69]:
# take a look to the table structure using the first chunk of the table:

v_observed_chunk = pd.read_csv(f's3://{db}/{h}_db/v_observed/{h}_observed001.csv')

In [70]:
v_observed_chunk.head(1)

Unnamed: 0,a_patientid,a_variableid,time,value
0,3961761,3010000100,1167,1.0


In [42]:
%%time

bucket = s3.Bucket('icusics-db-demo')
imv_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_observed']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        imv_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3015002262) & (chunk['value']==12) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc['a_patientid']))))]['a_patientid']))
        imv_patlist = imv_patlist + imv_patlist_chunk

CPU times: user 14.9 s, sys: 1.56 s, total: 16.5 s
Wall time: 43.8 s


In [48]:
los7d_diags_cvc_imv = los7d_diags_cvc[los7d_diags_cvc['a_patientid'].isin(imv_patlist)]

#### Search for APACHE2:

In [56]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'apache'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and derived (vartype=8) \
feature called 'APACHE 2 validado'")

(62, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and derived (vartype=8) feature called 'APACHE 2 validado'


In [55]:
result2 = result[(result['datatype']==0) & (result['vartype']==8) & (result['name'].str.contains('apache 2 validado', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
5790,3030000350,3,8,0,APACHE 2 validado,APACHE 2 man,Validated APACHE II score,,


#### Get patients with an APACHE 2 higher of 20 for those who have accomplished with the inclusion criteria up to now

In [71]:
# take a look to the table structure using the first chunk of the table:

v_derived_chunk = pd.read_csv(f's3://{db}/{h}_db/v_derived/{h}_derived001.csv')

In [72]:
v_derived_chunk.head(1)

Unnamed: 0,a_patientid,a_variableid,time,value
0,3669892,3030000100,0,7.0


In [57]:
%%time

bucket = s3.Bucket('icusics-db-demo')
apache2_20_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_derived']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        apache2_20_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3030000350) & (chunk['value']>20) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv['a_patientid']))))]['a_patientid']))
        apache2_20_patlist = apache2_20_patlist + apache2_20_patlist_chunk

CPU times: user 2min 10s, sys: 13 s, total: 2min 23s
Wall time: 6min 47s


In [58]:
los7d_diags_cvc_imv_apache = los7d_diags_cvc_imv[los7d_diags_cvc_imv['a_patientid'].isin(apache2_20_patlist)]

#### Search for Lactate:

In [62]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'lactat'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and labres (vartype=2) \
feature that contains 'GSA' label in the abbreviation")

(8, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and derived (vartype=8) feature called 'APACHE 2 validado'


In [64]:
result2 = result[(result['datatype']==0) & (result['vartype']==2) & (result['name'].str.contains('lactat', case=False, na=False)) & (
    result['abbreviation'].str.contains('GSA', case=False, na=False))]

print('So you finally get your result:')
result2

So you finally get your result:


Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
5398,3024000658,3,2,0,Lactat art GSA,Lactat a GSA,,,
5435,3024000704,3,2,0,aSan-Lactat,Lactat _GSA,,,


#### Get patients with an arterial lactate higher than 2mmol/L during the first ICU day for those who have accomplished with the inclusion criteria

In [74]:
# take a look to the table structure using the first chunk of the table:

v_labres_chunk = pd.read_csv(f's3://{db}/{h}_db/v_labres/{h}_labs001.csv')

In [75]:
v_labres_chunk.head(1)

Unnamed: 0,a_patientid,a_variableid,time,value
0,3070757,3020000100,21140,27.0


In [81]:
%%time

bucket = s3.Bucket('icusics-db-demo')
f_lactate_2_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_labres']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        f_lactate_2_patlist_chunk = tuple(set(chunk[(chunk['a_variableid'].isin([3024000658,3024000704])) & (chunk['value']>2) & (chunk['time']<1440) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache['a_patientid']))))]['a_patientid']))
        f_lactate_2_patlist = f_lactate_2_patlist + f_lactate_2_patlist_chunk

CPU times: user 1.53 s, sys: 139 ms, total: 1.67 s
Wall time: 3.72 s


In [82]:
los7d_diags_cvc_imv_apache_lactate = los7d_diags_cvc_imv_apache[los7d_diags_cvc_imv_apache['a_patientid'].isin(f_lactate_2_patlist)]

In [105]:
# define key characters (remember that strings in this db can be in english, catalan or spanish language) to start a blind search

key_chars = 'fr'

result = variables_ref[(variables_ref['name'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['description'].str.contains(key_chars, case=False, na=False)) | (
    variables_ref['choicestringvalue'].str.contains(key_chars, case=False, na=False))]

print(result.shape)

print("To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and labres (vartype=2) \
feature that contains 'GSA' label in the abbreviation")

(173, 9)
To many results, so you decide to ask to the mentors and they say that for that hospital, this feature is a numeric (datatype=0) and labres (vartype=2) feature that contains 'GSA' label in the abbreviation


In [106]:
result

Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
8,3000000200,3,1,0,Freqüència Cardíaca,FC,1.REGISTRE MANUAL contingència / M 11. ECOCARD...,,
9,3000000300,3,1,0,FR (m),FR (m),Freqüència Respiratòria Monitor\n1.REGISTRE MA...,,
10,3000000310,3,1,0,FR espontània,FR espont,DI 21.CONTROL RESPIRATORI,,
11,3000000320,3,1,0,FR (p),FR (p),1.REGISTRE MANUAL contingència / M 04.VENTILAC...,,
33,3000001400,3,1,0,INrepl(m),INrepl(m),Replacement input from RRT device,,
...,...,...,...,...,...,...,...,...,...
5591,3024001080,3,2,0,San-Hemoglobina F; fr.subst.,Hgb FfFrSubs,,,
5659,3024001865,3,2,0,"cSan-Oxigen, fr.sat.",cSan-SaO2,,,
5875,3030007000,3,8,0,BALANÇ Dialisis (cmd),BALdial(cmd),"Cumulative dialysis balance within fluid day, ...",,
5876,3030007001,3,8,0,OUTdial(cmd),OUTdial(cmd),"Cumulative dialysis output within fluid day, c...",,


In [107]:
%%time

bucket = s3.Bucket('icusics-db-demo')
hr = pd.DataFrame()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_monitored']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        hr_chunk = chunk[(chunk['a_variableid']==3000000200) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache_lactate['a_patientid']))))]
        hr = pd.concat([hr, hr_chunk])

CPU times: user 37.5 s, sys: 4.33 s, total: 41.9 s
Wall time: 1min 28s


In [114]:
%%time

bucket = s3.Bucket('icusics-db-demo')
caca = pd.DataFrame()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_monitored']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        caca_chunk = chunk[chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache_lactate['a_patientid'])))]
        caca = pd.concat([caca, caca_chunk])

CPU times: user 38.8 s, sys: 4.22 s, total: 43 s
Wall time: 1min 51s


In [127]:
caca_agg = caca.groupby(['a_variableid'], as_index=False).agg({'a_patientid':'nunique'}).sort_values('a_patientid', ascending=False)

In [128]:
caca_comu = caca_agg[caca_agg['a_patientid']==12]

In [130]:
caca_comu['a_variableid'].value_counts()

3000000610    1
3000000600    1
3000005600    1
3000002010    1
3000002400    1
3000000620    1
3000005646    1
3000007102    1
3000000320    1
3000002600    1
Name: a_variableid, dtype: int64

In [131]:
variables_ref[variables_ref['a_variableid'].isin(tuple(set(caca_comu['a_variableid'])))]

Unnamed: 0,a_variableid,hospital_coded,vartype,datatype,name,abbreviation,description,choicecode,choicestringvalue
11,3000000320,3,1,0,FR (p),FR (p),1.REGISTRE MANUAL contingència / M 04.VENTILAC...,,
15,3000000600,3,1,0,PAs NI,PAs NI,1.REGISTRE MANUAL contingència,,
16,3000000610,3,1,0,PAm NI,PAm NI,1.REGISTRE MANUAL contingència,,
17,3000000620,3,1,0,PAd NI,PAd NI,1.REGISTRE MANUAL contingència,,
40,3000002010,3,1,0,FiO2,FiO2,Inspired O2\n1.REGISTRE MANUAL contingència / ...,,
45,3000002400,3,1,0,Vt (p),Vt (p),M 04.VENTILACIÓ MECÀNICA,,
50,3000002600,3,1,0,PEEP (p),PEEP (p),Set positive end-expiratory pressure 1\n1.REGI...,,
190,3000005600,3,1,0,FR monitor,FR monitor,,,
198,3000005646,3,1,0,Pressió Suport,P. suport,Pressió Suport Monitoritzada\nPuritan Bennet. ...,,
207,3000007102,3,1,0,Tª axilar,Temp axilar,1.REGISTRE MANUAL contingència / DI 06.CONTROL...,,


In [132]:
caca[caca['a_variableid']==3000000320]['value'].describe()

count    40677.000000
mean        12.465619
std          9.526971
min          0.000000
25%          0.000000
50%         17.000000
75%         20.000000
max         30.000000
Name: value, dtype: float64

In [134]:
caca[(caca['a_variableid']==3000000320) & (caca['value']>20)]['a_patientid'].value_counts()

3001780    2249
3257877    1456
3803957    1259
3345064     623
3703957     514
3219293     455
3792693     316
3037324     238
3454744     156
Name: a_patientid, dtype: int64

In [120]:
caca[caca['a_variableid']==3000000200]['a_patientid'].value_counts()

3792693    43123
3647997    20640
3219293    15567
3454744      150
3703957      102
3257877        2
Name: a_patientid, dtype: int64

In [117]:
caca['a_variableid'].value_counts()

3000000200    79584
3000000110    79511
3000000120    78952
3000000100    78951
3000004000    78054
              ...  
3000003100        5
3000001310        3
3000001320        3
3000001350        3
3000003101        1
Name: a_variableid, Length: 75, dtype: int64

In [115]:
caca['a_patientid'].value_counts()

3792693    302832
3001780    177794
3647997    155100
3219293    148453
3615379     96867
3803957     77868
3345064     76161
3257877     49494
3738345     36476
3454744     35718
3037324     18053
3703957      7689
Name: a_patientid, dtype: int64

In [113]:
hr['a_patientid'].value_counts()

3792693    43123
3647997    20640
3219293    15567
3454744      150
3703957      102
3257877        2
Name: a_patientid, dtype: int64

In [109]:
los7d_diags_cvc_imv_apache_lactate['a_patientid']

1482    3345064
2494    3615379
3426    3738345
4163    3037324
4239    3803957
4856    3001780
5112    3703957
5117    3454744
5555    3792693
5609    3219293
6368    3647997
6692    3257877
Name: a_patientid, dtype: int64

In [108]:
hr['a_patientid'].nunique()

6

In [97]:
vt.shape

(440, 4)

In [91]:
%%time

bucket = s3.Bucket('icusics-db-demo')
crrt_patlist = tuple()

for my_bucket_object in bucket.objects.all():
    
    if all(x in my_bucket_object.key for x in ['h3', 'v_monitored']):
            
        chunk = pd.read_csv(f's3://{db}/{my_bucket_object.key}')
        crrt_patlist_chunk = tuple(set(chunk[(chunk['a_variableid']==3000002410) & (chunk['value']>800) & (
            chunk['a_patientid'].isin(tuple(set(los7d_diags_cvc_imv_apache_lactate['a_patientid']))))]['a_patientid']))
        crrt_patlist = crrt_patlist + crrt_patlist_chunk

CPU times: user 38.8 s, sys: 4.31 s, total: 43.1 s
Wall time: 1min 52s


In [93]:
crrt_patlist

()

In [110]:
print('patients in ICUSICS demo database',
      patients['a_patientid'].nunique())
print('patients with ICU LOS > 7 days:',
      los7d['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd:',
      los7d_diags['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc:',
      los7d_diags_cvc['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv:',
      los7d_diags_cvc_imv['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20:',
      los7d_diags_cvc_imv_apache['a_patientid'].nunique())
print('patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and first lactate >2mmol/L:',
      los7d_diags_cvc_imv_apache_lactate['a_patientid'].nunique())

patients in ICUSICS demo database 6928
patients with ICU LOS > 7 days: 2233
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd: 81
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc: 80
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv: 79
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20: 28
patients with ICU LOS > 7 days and pneumonia as pd and myopathy as sd and cvc and imv with an apache2 > 20 and first lactate >2mmol/L: 12


In [60]:
los7d_diags_cvc_imv_apache.head(1)

Unnamed: 0,a_patientid,hospital_coded,patientsex,age,height,weight,bmi,hospadmtime,admwardname,distime,diswardname,hospdistime,hospital_outcome
1482,3345064,3,M,30,170,70,24,-4,URGÈNCIES,52365,MEDICINA INTERNA,72262,ALIVE


In [61]:
los7d_diags_cvc_imv_apache['hospital_outcome'].value_counts(normalize=True)

ALIVE     0.785714
EXITUS    0.214286
Name: hospital_outcome, dtype: float64

In [None]:
insertions = pd.read_csv(f's3://{db}/{h}_db/insertions/{h}_insertions.csv')