From 2318d0cfe4185cfddb29966cde665c81d996ae75 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Tue, 11 Jul 2017 12:38:38 -0400 Subject: [PATCH 1/9] authors release code under the MIT License --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 8ef1e0e..28c97c4 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,10 @@ Our team has worked hard to create and share the MIMIC dataset. We encourage you We encourage users to share concepts they have extracted by writing code which generates a materialized view. These materialized views can then be used by researchers around the world to speed up data extraction. For example, ventilation durations can be acquired by creating the ventdurations view in [etc/ventilation-durations.sql](https://github.com/MIT-LCP/mimic-code/blob/master/concepts/ventilation-durations.sql). +## License + +By committing your code to the [MIMIC Code Repository](https://github.com/mit-lcp/mimic-code) you agree to release the code under the [MIT License attached to the repository](https://github.com/mit-lcp/mimic-code/blob/master/LICENSE). + ## Coding style Please refer to the [style guide](https://github.com/MIT-LCP/mimic-code/blob/master/styleguide.md) for guidelines on formatting your code for the repository. From 9e05ab27b947b664f7ddbc5dcfe0c3e573c69fcc Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Wed, 19 Jul 2017 20:13:02 -0400 Subject: [PATCH 2/9] don't drop materialized view at end of script --- concepts/durations/ventilation-durations.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/concepts/durations/ventilation-durations.sql b/concepts/durations/ventilation-durations.sql index f500ab1..1d336fe 100644 --- a/concepts/durations/ventilation-durations.sql +++ b/concepts/durations/ventilation-durations.sql @@ -255,5 +255,3 @@ having min(charttime) != max(charttime) -- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored and max(mechvent) = 1 order by icustay_id, ventnum; - -DROP MATERIALIZED VIEW ventsettings; From e887d577048e5c168a1df988e2c864151a835b66 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 25 Jul 2017 19:10:35 -0400 Subject: [PATCH 3/9] clean up scripts and fix bugs with makefile --- buildmimic/postgres/Makefile | 24 ++++++++---------------- buildmimic/postgres/create_mimic_user.sh | 23 +++++++++++++++-------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/buildmimic/postgres/Makefile b/buildmimic/postgres/Makefile index 4782879..2f2f1bc 100644 --- a/buildmimic/postgres/Makefile +++ b/buildmimic/postgres/Makefile @@ -1,26 +1,18 @@ # Config PHYSIONETURL=https://physionet.org/works/MIMICIIIClinicalDatabase/files/ -# The following check whether values are passed via environment, set to defaults if not -ifeq ($(DBNAME),) +# Set the following parameters to defaults +# These will be overwritten by settings passed to the makefile DBNAME := mimic -endif - -ifeq ($(DBUSER),) DBUSER := postgres -endif - -# Specify the password here -# If you don't specify a password, then the role will not require one to login via password authentication -#DBPASS= - -# Change "mimiciii" to specify a different schema +DBPASS := postgres DBSCHEMA := mimiciii # NOTE: you do not need to specify localhost/port -# in fact, this is detrimental if you want to use peer authentication, as "localhost" is not strictly local -#DBHOST := localhost -#DBPORT := 5432 +# in fact, this is detrimental if you want to use peer authentication +# "localhost" uses a loopback, so peer authentication doesn't work with it +DBHOST := +DBPORT := # when connecting, we use a single variable: DBSTRING # **do not modify this** @@ -66,7 +58,7 @@ create-user: @echo '------------------------' @echo '' @sleep 2 - MIMIC_USER="$(DBUSER)" MIMIC_DB="$(DBNAME)" MIMIC_PASSWORD="$(DBPASS)" MIMIC_SCHEMA="$(DBSCHEMA)" ./create_mimic_user.sh + MIMICUSER="$(DBUSER)" MIMIC_DB="$(DBNAME)" MIMIC_PASSWORD="$(DBPASS)" MIMIC_SCHEMA="$(DBSCHEMA)" ./create_mimic_user.sh mimic-build-gz: @echo '------------------------' diff --git a/buildmimic/postgres/create_mimic_user.sh b/buildmimic/postgres/create_mimic_user.sh index e6bac1c..db153f5 100755 --- a/buildmimic/postgres/create_mimic_user.sh +++ b/buildmimic/postgres/create_mimic_user.sh @@ -16,9 +16,9 @@ fi if [ -z ${MIMIC_USER+x} ]; then MIMIC_USER=postgres - echo "MIMIC_USER is unset, using default '$MIMIC_USER'"; + echo "User is unset, using default '$MIMIC_USER'"; else - echo "MIMIC_USER is set to '$MIMIC_USER'"; + echo "User is set to '$MIMIC_USER'"; fi # if hash gosu 2>/dev/null; then @@ -27,9 +27,16 @@ fi # SUDO='sudo -u postgres' # fi -$SUDO psql postgres > /dev/null <<- EOSQL - CREATE USER $MIMIC_USER WITH PASSWORD '$MIMIC_PASSWORD'; - DROP DATABASE IF EXISTS $MIMIC_DB; - CREATE DATABASE $MIMIC_DB OWNER $MIMIC_USER; - CREATE SCHEMA $MIMIC_SCHEMA AUTHORIZATION $MIMIC_USER; -EOSQL +if [ "$MIMIC_USER" != "postgres" ]; then + # create user + psql postgres -c "DROP USER IF EXISTS $MIMIC_USER;" + psql postgres -c "CREATE USER $MIMIC_USER WITH PASSWORD '$MIMIC_PASSWORD';" +fi + +# create database +psql postgres -c "DROP DATABASE IF EXISTS $MIMIC_DB;" +psql postgres -c "CREATE DATABASE $MIMIC_DB OWNER $MIMIC_USER;" + +# create schema on database +export PGPASSWORD=$MIMIC_PASSWORD +psql -U $MIMIC_USER -d ${MIMIC_DB} -c "CREATE SCHEMA $MIMIC_SCHEMA AUTHORIZATION $MIMIC_USER;" From 69925c54d6a699ef082ee75e8c55fec829745ce4 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 25 Jul 2017 19:11:38 -0400 Subject: [PATCH 4/9] exit on any error code --- buildmimic/postgres/create_mimic_user.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/buildmimic/postgres/create_mimic_user.sh b/buildmimic/postgres/create_mimic_user.sh index db153f5..cbc7a64 100755 --- a/buildmimic/postgres/create_mimic_user.sh +++ b/buildmimic/postgres/create_mimic_user.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e if [ -z ${MIMIC_PASSWORD+x} ]; then echo "MIMIC_PASSWORD is unset"; From b34133c29e2d7428915b116b0f2a3fa9d4c792e3 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Wed, 26 Jul 2017 11:47:38 -0400 Subject: [PATCH 5/9] remove superfluous commit message --- concepts/firstday/labs-first-day.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/concepts/firstday/labs-first-day.sql b/concepts/firstday/labs-first-day.sql index c69110c..a50ca72 100644 --- a/concepts/firstday/labs-first-day.sql +++ b/concepts/firstday/labs-first-day.sql @@ -153,5 +153,3 @@ FROM ) pvt GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id; - -commit; From ddec2541a6313a981e5e4941d685333c6cf91a2a Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Wed, 26 Jul 2017 11:47:51 -0400 Subject: [PATCH 6/9] add some comments on start of script --- concepts/make-concepts.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/concepts/make-concepts.sql b/concepts/make-concepts.sql index 0419951..2e72f27 100644 --- a/concepts/make-concepts.sql +++ b/concepts/make-concepts.sql @@ -1,7 +1,13 @@ -- This file makes all materialized views in this subfolder -- Note that this may take a large amount of time and hard drive space +\echo '' +\echo '===' \echo 'Beginning to create materialized views for MIMIC database.' +\echo 'Any notices of the form "NOTICE: materialized view "XXXXXX" does not exist" can be ignored.' +\echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.' +\echo '===' +\echo '' \echo 'Top level files..' \i code-status.sql From 9ae812fe099614e7304bc1f6321857c01fb970f3 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Wed, 26 Jul 2017 12:00:43 -0400 Subject: [PATCH 7/9] clarify build/common issues --- buildmimic/postgres/README.md | 45 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/buildmimic/postgres/README.md b/buildmimic/postgres/README.md index d20ca26..f8d608c 100644 --- a/buildmimic/postgres/README.md +++ b/buildmimic/postgres/README.md @@ -22,16 +22,27 @@ For example, to create MIMIC from a set of zipped CSV files in the "/path/to/dat $ make mimic datadir="/path/to/data/" ``` -If default connection parameters are not correct, specify in Makefile header or in environment, e.g.: +By default, the Makefile uses the following parameters: + +* Database name: `mimic` +* User name: `postgres` +* Password: `postgres` +* Schema: `mimiciii` +* Host: none (defaults to localhost) +* Port: none (defaults to 5432) + +If you would like to change any of these parameters, you can do so in the make call: ``` bash -$ DBNAME="my_db" DBPASS="my_pass" DBHOST="192.168.0.1" make mimic-build datadir="/path/to/data/" +$ make mimic datadir="/path/to/data/" DBNAME="my_db" DBPASS="my_pass" DBHOST="192.168.0.1" ``` -When using the database be sure to switch to the mimic namespace, +When using the database be sure to change the default search path to the mimic schema: ```bash -$ psql mimic +# connect to database mimic +$ psql -d mimic +# set default schema to mimiciii mimic=# SET search_path TO mimiciii; ``` @@ -45,3 +56,29 @@ LINE 1: CREATE SCHEMA IF NOT EXISTS mimiciii; ``` The `IF NOT EXISTS` syntax was introduced in PostgreSQL 9.3. Make sure you have the latest PostgreSQL version. While one possible option is to modify the code here to be function under earlier versions, we highly recommend upgrading as most of the code written in this repository uses materialized views (which were introduced in PostgreSQL version 9.4). + +## NOTICE + +```sql +NOTICE: materialized view "XXXXXX" does not exist, skipping +``` + +This is normal. By default, the script attempts to delete tables before rebuilding them. If it cannot find the table to delete, it outputs a notice letting the user know. + +## Stuck on copy + +Many users report that the scripts get stuck at the following point: + +``` +COPY 58976 +COPY 34499 +COPY 7567 +``` + +This is expected. The 4th table is CHARTEVENTS, and this table can take many hours to load. Give it time, and ensure that the computer does not automatically hibernate during this time. + +Also note that eventually, the 4th line will read `COPY 0`. This is expected, see https://github.com/MIT-LCP/mimic-code/issues/182 + +## Other + +Please see the issues page to discuss other issues you may be having: https://github.com/MIT-LCP/mimic-code/issues From da694d75c25f9e9aa547ae68803da5981ab4e992 Mon Sep 17 00:00:00 2001 From: Angus Zhang Date: Thu, 3 Aug 2017 14:21:48 +0800 Subject: [PATCH 8/9] Update crrt-durations.sql --- concepts/durations/crrt-durations.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/concepts/durations/crrt-durations.sql b/concepts/durations/crrt-durations.sql index b091e24..da9cbb3 100644 --- a/concepts/durations/crrt-durations.sql +++ b/concepts/durations/crrt-durations.sql @@ -195,6 +195,8 @@ select icustay_id , ROW_NUMBER() over (partition by icustay_id order by num) as num , min(charttime) as starttime , max(charttime) as endtime + , extract(epoch from max(charttime)-min(charttime))/60/60 AS duration_hours + -- add durations from vd2 group by icustay_id, num having min(charttime) != max(charttime) From d4fa48714c4c2053e5fce5004e823f579afa5538 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Sun, 6 Aug 2017 14:52:38 +0100 Subject: [PATCH 9/9] update tests for v1_4, still broken on travis --- tests/test_mysql_build.py | 24 ++++----- tests/test_oracle_build.py | 24 ++++----- tests/test_postgres_build.py | 70 +++++++++++++------------- tests/testdata/v1_4/ADMISSIONS.csv.gz | Bin 0 -> 183 bytes tests/testdata/v1_4/CALLOUT.csv.gz | Bin 0 -> 187 bytes tests/testdata/v1_4/CAREGIVERS.csv.gz | Bin 0 -> 68 bytes tests/testdata/v1_4/CHARTEVENTS.csv.gz | Bin 0 -> 131 bytes tests/testdata/v1_4/CPTEVENTS.csv.gz | Bin 0 -> 129 bytes tests/testdata/v1_4/DATETIMEEVENTS.csv.gz | Bin 0 -> 131 bytes tests/testdata/v1_4/DIAGNOSES_ICD.csv.gz | Bin 0 -> 81 bytes tests/testdata/v1_4/DRGCODES.csv.gz | Bin 0 -> 102 bytes tests/testdata/v1_4/D_CPT.csv.gz | Bin 0 -> 104 bytes tests/testdata/v1_4/D_ICD_DIAGNOSES.csv.gz | Bin 0 -> 79 bytes tests/testdata/v1_4/D_ICD_PROCEDURES.csv.gz | Bin 0 -> 80 bytes tests/testdata/v1_4/D_ITEMS.csv.gz | Bin 0 -> 114 bytes tests/testdata/v1_4/D_LABITEMS.csv.gz | Bin 0 -> 83 bytes tests/testdata/v1_4/ICUSTAYS.csv.gz | Bin 0 -> 120 bytes tests/testdata/v1_4/INPUTEVENTS_CV.csv.gz | Bin 0 -> 166 bytes tests/testdata/v1_4/INPUTEVENTS_MV.csv.gz | Bin 0 -> 245 bytes tests/testdata/v1_4/LABEVENTS.csv.gz | Bin 0 -> 94 bytes tests/testdata/v1_4/MICROBIOLOGYEVENTS.csv.gz | Bin 0 -> 163 bytes tests/testdata/v1_4/NOTEEVENTS.csv.gz | Bin 0 -> 116 bytes tests/testdata/v1_4/OUTPUTEVENTS.csv.gz | Bin 0 -> 124 bytes tests/testdata/v1_4/PATIENTS.csv.gz | Bin 0 -> 91 bytes tests/testdata/v1_4/PRESCRIPTIONS.csv.gz | Bin 0 -> 167 bytes tests/testdata/v1_4/PROCEDUREEVENTS_MV.csv.gz | Bin 0 -> 216 bytes tests/testdata/v1_4/PROCEDURES_ICD.csv.gz | Bin 0 -> 82 bytes tests/testdata/v1_4/SERVICES.csv.gz | Bin 0 -> 90 bytes tests/testdata/v1_4/TRANSFERS.csv.gz | Bin 0 -> 127 bytes 29 files changed, 59 insertions(+), 59 deletions(-) create mode 100644 tests/testdata/v1_4/ADMISSIONS.csv.gz create mode 100644 tests/testdata/v1_4/CALLOUT.csv.gz create mode 100644 tests/testdata/v1_4/CAREGIVERS.csv.gz create mode 100644 tests/testdata/v1_4/CHARTEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/CPTEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/DATETIMEEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/DIAGNOSES_ICD.csv.gz create mode 100644 tests/testdata/v1_4/DRGCODES.csv.gz create mode 100644 tests/testdata/v1_4/D_CPT.csv.gz create mode 100644 tests/testdata/v1_4/D_ICD_DIAGNOSES.csv.gz create mode 100644 tests/testdata/v1_4/D_ICD_PROCEDURES.csv.gz create mode 100644 tests/testdata/v1_4/D_ITEMS.csv.gz create mode 100644 tests/testdata/v1_4/D_LABITEMS.csv.gz create mode 100644 tests/testdata/v1_4/ICUSTAYS.csv.gz create mode 100644 tests/testdata/v1_4/INPUTEVENTS_CV.csv.gz create mode 100644 tests/testdata/v1_4/INPUTEVENTS_MV.csv.gz create mode 100644 tests/testdata/v1_4/LABEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/MICROBIOLOGYEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/NOTEEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/OUTPUTEVENTS.csv.gz create mode 100644 tests/testdata/v1_4/PATIENTS.csv.gz create mode 100644 tests/testdata/v1_4/PRESCRIPTIONS.csv.gz create mode 100644 tests/testdata/v1_4/PROCEDUREEVENTS_MV.csv.gz create mode 100644 tests/testdata/v1_4/PROCEDURES_ICD.csv.gz create mode 100644 tests/testdata/v1_4/SERVICES.csv.gz create mode 100644 tests/testdata/v1_4/TRANSFERS.csv.gz diff --git a/tests/test_mysql_build.py b/tests/test_mysql_build.py index a1434c4..b13ca99 100644 --- a/tests/test_mysql_build.py +++ b/tests/test_mysql_build.py @@ -8,7 +8,7 @@ sqluser = 'root' testdbname = 'mimic_test_db' hostname = 'localhost' -datadir = 'testdata/v1_3/' +datadir = 'testdata/v1_4/' schema = 'mimiciii' # Set paths for scripts to be tested @@ -22,23 +22,23 @@ "ADMISSIONS": 58976, "CALLOUT": 34499, "CAREGIVERS": 7567, -"CHARTEVENTS": 263201375, +"CHARTEVENTS": 330712483, "CPTEVENTS": 573146, "D_CPT": 134, "D_ICD_DIAGNOSES": 14567, "D_ICD_PROCEDURES": 3882, "D_ITEMS": 12478, -"D_LABITEMS": 755, -"DATETIMEEVENTS": 4486049, +"D_LABITEMS": 753, +"DATETIMEEVENTS": 4485937, "DIAGNOSES_ICD": 651047, "DRGCODES": 125557, "ICUSTAYS": 61532, -"INPUTEVENTS_CV": 17528894, +"INPUTEVENTS_CV": 17527935, "INPUTEVENTS_MV": 3618991, -"LABEVENTS": 27872575, -"MICROBIOLOGYEVENTS": 328446, -"NOTEEVENTS": 2078705, -"OUTPUTEVENTS": 4349339, +"LABEVENTS": 27854055, +"MICROBIOLOGYEVENTS": 631726, +"NOTEEVENTS": 2083180, +"OUTPUTEVENTS": 4349218, "PATIENTS": 46520, "PRESCRIPTIONS": 4156848, "PROCEDUREEVENTS_MV": 258066, @@ -50,10 +50,10 @@ def run_mysql_build_scripts(cur): # Create tables and loads data fn = curpath + '../buildmimic/mysql/1-define.sql' cur.execute(open(fn, "r").read()) - if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': + if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': # use full dataset mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' - else: + else: mimic_data_dir = curpath+datadir call(['mysql','-f',fn,'-d',testdbname,'-U',sqluser,'-v','mimic_data_dir='+mimic_data_dir]) # # Add constraints @@ -74,7 +74,7 @@ def setUpClass(cls): cls.con = MySQLdb.connect(host=hostname, user=sqluser) cls.cur = cls.con.cursor() # Create test database - try: + try: cls.cur.execute('DROP DATABASE ' + testdbname) except MySQLdb.OperationalError: pass diff --git a/tests/test_oracle_build.py b/tests/test_oracle_build.py index 0a83b11..59dabc4 100644 --- a/tests/test_oracle_build.py +++ b/tests/test_oracle_build.py @@ -8,7 +8,7 @@ sqluser = 'root' testdbname = 'mimic_test_db' hostname = 'localhost' -datadir = 'testdata/v1_3/' +datadir = 'testdata/v1_4/' schema = 'mimiciii' # Set paths for scripts to be tested @@ -22,23 +22,23 @@ "ADMISSIONS": 58976, "CALLOUT": 34499, "CAREGIVERS": 7567, -"CHARTEVENTS": 263201375, +"CHARTEVENTS": 330712483, "CPTEVENTS": 573146, "D_CPT": 134, "D_ICD_DIAGNOSES": 14567, "D_ICD_PROCEDURES": 3882, "D_ITEMS": 12478, -"D_LABITEMS": 755, -"DATETIMEEVENTS": 4486049, +"D_LABITEMS": 753, +"DATETIMEEVENTS": 4485937, "DIAGNOSES_ICD": 651047, "DRGCODES": 125557, "ICUSTAYS": 61532, -"INPUTEVENTS_CV": 17528894, +"INPUTEVENTS_CV": 17527935, "INPUTEVENTS_MV": 3618991, -"LABEVENTS": 27872575, -"MICROBIOLOGYEVENTS": 328446, -"NOTEEVENTS": 2078705, -"OUTPUTEVENTS": 4349339, +"LABEVENTS": 27854055, +"MICROBIOLOGYEVENTS": 631726, +"NOTEEVENTS": 2083180, +"OUTPUTEVENTS": 4349218, "PATIENTS": 46520, "PRESCRIPTIONS": 4156848, "PROCEDUREEVENTS_MV": 258066, @@ -50,10 +50,10 @@ # # Create tables and loads data # fn = curpath + '../buildmimic/mysql/1-define.sql' # cur.execute(open(fn, "r").read()) -# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': +# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': # # use full dataset # mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' -# else: +# else: # mimic_data_dir = curpath+datadir # call(['mysql','-f',fn,'-d',testdbname,'-U',sqluser,'-v','mimic_data_dir='+mimic_data_dir]) # # # Add constraints @@ -74,7 +74,7 @@ # cls.con = MySQLdb.connect(host=hostname, user=sqluser) # cls.cur = cls.con.cursor() # # Create test database -# try: +# try: # cls.cur.execute('DROP DATABASE ' + testdbname) # except MySQLdb.OperationalError: # pass diff --git a/tests/test_postgres_build.py b/tests/test_postgres_build.py index 0a1c1fd..626c856 100644 --- a/tests/test_postgres_build.py +++ b/tests/test_postgres_build.py @@ -14,7 +14,7 @@ psqluser = 'postgres' testdbname = 'mimic_test_db' hostname = 'localhost' -datadir = 'testdata/v1_3/' +datadir = 'testdata/v1_4/' schema = 'mimiciii' # Set paths for scripts to be tested @@ -28,23 +28,23 @@ "ADMISSIONS": 58976, "CALLOUT": 34499, "CAREGIVERS": 7567, -"CHARTEVENTS": 263201375, +"CHARTEVENTS": 330712483, "CPTEVENTS": 573146, "D_CPT": 134, "D_ICD_DIAGNOSES": 14567, "D_ICD_PROCEDURES": 3882, "D_ITEMS": 12478, -"D_LABITEMS": 755, -"DATETIMEEVENTS": 4486049, +"D_LABITEMS": 753, +"DATETIMEEVENTS": 4485937, "DIAGNOSES_ICD": 651047, "DRGCODES": 125557, "ICUSTAYS": 61532, -"INPUTEVENTS_CV": 17528894, +"INPUTEVENTS_CV": 17527935, "INPUTEVENTS_MV": 3618991, -"LABEVENTS": 27872575, -"MICROBIOLOGYEVENTS": 328446, -"NOTEEVENTS": 2078705, -"OUTPUTEVENTS": 4349339, +"LABEVENTS": 27854055, +"MICROBIOLOGYEVENTS": 631726, +"NOTEEVENTS": 2083180, +"OUTPUTEVENTS": 4349218, "PATIENTS": 46520, "PRESCRIPTIONS": 4156848, "PROCEDUREEVENTS_MV": 258066, @@ -78,10 +78,10 @@ def run_postgres_build_scripts(cur): cur.execute(open(fn, "r").read()) # Loads data fn = curpath + '../buildmimic/postgres/postgres_load_data.sql' - if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': + if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': # use full dataset - mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' - else: + mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_4/' + else: mimic_data_dir = curpath+datadir call(['psql','-f',fn,'-d',testdbname,'-U',psqluser,'-v','mimic_data_dir='+mimic_data_dir]) # Add constraints @@ -98,10 +98,10 @@ def run_postgres_build_scripts(cur): # cur.execute(open(fn, "r").read()) # # Loads data # fn = curpath + '../buildmimic/mysql/mysql_load_data.sql' -# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': +# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': # # use full dataset # mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' -# else: +# else: # mimic_data_dir = curpath+datadir # call(['psql','-f',fn,'-d',testdbname,'-U',psqluser,'-v','mimic_data_dir='+mimic_data_dir]) # # Add constraints @@ -121,7 +121,7 @@ def setUpClass(cls): cls.con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cls.cur = cls.con.cursor() # Create test database - try: + try: cls.cur.execute('DROP DATABASE ' + testdbname) except psycopg2.ProgrammingError: pass @@ -180,7 +180,7 @@ def test_testddl(self): # Run a series of checks to ensure ITEMIDs are valid # All checks should return 0. # -------------------------------------------------- - + def test_itemids_in_inputevents_cv_are_shifted(self): query = """ -- prompt Number of ITEMIDs which were erroneously left as original value @@ -189,7 +189,7 @@ def test_itemids_in_inputevents_cv_are_shifted(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_inputevents_mv_are_shifted(self): query = """ -- prompt Number of ITEMIDs which were erroneously left as original value @@ -198,7 +198,7 @@ def test_itemids_in_inputevents_mv_are_shifted(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_outputevents_are_shifted(self): query = """ -- prompt Number of ITEMIDs which were erroneously left as original value @@ -207,7 +207,7 @@ def test_itemids_in_outputevents_are_shifted(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_inputevents_cv_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are above the allowable range @@ -216,7 +216,7 @@ def test_itemids_in_inputevents_cv_are_in_range(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_outputevents_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are not in the allowable range @@ -225,7 +225,7 @@ def test_itemids_in_outputevents_are_in_range(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_chartevents_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are not in the allowable range @@ -234,7 +234,7 @@ def test_itemids_in_chartevents_are_in_range(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_procedureevents_mv_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are not in the allowable range @@ -243,7 +243,7 @@ def test_itemids_in_procedureevents_mv_are_in_range(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_labevents_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are not in the allowable range @@ -252,7 +252,7 @@ def test_itemids_in_labevents_are_in_range(self): """ queryresult = pd.read_sql_query(query,self.con) self.assertEqual(queryresult.values[0][0],0) - + def test_itemids_in_microbiologyevents_are_in_range(self): query = """ -- prompt Number of ITEMIDs which are not in the allowable range @@ -267,7 +267,7 @@ def test_itemids_in_microbiologyevents_are_in_range(self): # ---------------------------------------------------- # RUN THE FOLLOWING TESTS ON THE FULL DATASET ONLY --- # ---------------------------------------------------- - + if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': def test_row_counts_are_as_expected(self): for tablename,expectedrows in row_dict.iteritems(): @@ -279,20 +279,20 @@ def test_age_and_los_is_expected(self): query = \ """ WITH icuadmissions as ( - SELECT a.subject_id, a.hadm_id, i.icustay_id, - a.admittime as hosp_admittime, a.dischtime as hosp_dischtime, - i.first_careunit, + SELECT a.subject_id, a.hadm_id, i.icustay_id, + a.admittime as hosp_admittime, a.dischtime as hosp_dischtime, + i.first_careunit, DENSE_RANK() over(PARTITION BY a.hadm_id ORDER BY i.intime ASC) as icu_seq, - p.dob, p.dod, i.intime as icu_intime, i.outtime as icu_outtime, + p.dob, p.dod, i.intime as icu_intime, i.outtime as icu_outtime, i.los as icu_los, - round((EXTRACT(EPOCH FROM (a.dischtime-a.admittime))/60/60/24) :: NUMERIC, 4) as hosp_los, - p.gender, + round((EXTRACT(EPOCH FROM (a.dischtime-a.admittime))/60/60/24) :: NUMERIC, 4) as hosp_los, + p.gender, round((EXTRACT(EPOCH FROM (a.admittime-p.dob))/60/60/24/365.242) :: NUMERIC, 4) as age_hosp_in, round((EXTRACT(EPOCH FROM (i.intime-p.dob))/60/60/24/365.242) :: NUMERIC, 4) as age_icu_in, hospital_expire_flag, - CASE WHEN p.dod IS NOT NULL + CASE WHEN p.dod IS NOT NULL AND p.dod >= i.intime - interval '6 hour' - AND p.dod <= i.outtime + interval '6 hour' THEN 1 + AND p.dod <= i.outtime + interval '6 hour' THEN 1 ELSE 0 END AS icu_expire_flag FROM admissions a INNER JOIN icustays i @@ -300,8 +300,8 @@ def test_age_and_los_is_expected(self): INNER JOIN patients p ON a.subject_id = p.subject_id ORDER BY a.subject_id, i.intime) - SELECT round(avg(age_icu_in)) as avg_age_icu, - round(avg(hosp_los)) as avg_los_hosp, + SELECT round(avg(age_icu_in)) as avg_age_icu, + round(avg(hosp_los)) as avg_los_hosp, round(avg(icu_los)) as avg_los_icu FROM icuadmissions; """ diff --git a/tests/testdata/v1_4/ADMISSIONS.csv.gz b/tests/testdata/v1_4/ADMISSIONS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b156907074196495a57be98bf8c403c8171c5325 GIT binary patch literal 183 zcmV;o07(BIiwFo79fw%}13^ShNmElvPfk-VV{>)@RgXaq!Y~X3?>r-CK4B8$7^O~S zJ3vqQ|1U@=6>uA`cCFo@_0c(nbFg-~Vf6jTkd~bWusbg}II+ciFoGXEJ!V{``?mfI zrAF`pM;esyS7foKAaSM&2sNOIiiK%+ETHUAx;c2WA)sgB$i7WZiu7rLskU!ZAgRXF lpR29!5BOYJv0n?&d!qH($MKHh))P2{(+iv`_~H-&007agQkwt( literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/CALLOUT.csv.gz b/tests/testdata/v1_4/CALLOUT.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b70766858158aa17462af339b2d8f995b8ed0d57 GIT binary patch literal 187 zcmV;s07U;EiwFqR9EVu|14BVfOixu*E@N|c0A-L(62dSDM(@(Kmq0=-nx6`!UH1OB zFx6<7agoJ)A25(f%6@Q|E|b>lozOn8KuABG*p3|}ct#ManXMVDA|rp3Pa(!q-GHx9 zl8up?5qefqJB*@7Azd9};CXhOM8V-EZ5amihuKSTL1cmVh2ll8WgOTTwKNw%=L-C|9eT`rNE*x_F%g{zjbyFw~m_L YusmU`aOCoR|4wd(sB=?j@)awQ&kU zO;^4DE$Y2!ekm li9aO|HlXB_?LwkVJqfD>?tfV7x>yPt^#P@uVbz@g002SuHLw5x literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/CPTEVENTS.csv.gz b/tests/testdata/v1_4/CPTEVENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..901a6572e91ee514224f5f761a3ddd7eb02fc958 GIT binary patch literal 129 zcmV-{0Dk`;iwFqR9EVu|14B?$MOH;lR8uZvb9Mk-jX@58APfZm!r4ziTSGN~(h|M# z|6fSOled}8X4lnnbw;;1X<;MKDbZt^1Vt1mD^-)|tD;vjKC}aVkXs6~F+|$B@;yWu jcCy7TblSz(=aM6Ne9PnVA0Y`;J}h$&)VJ3foB#j-D&01g literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/DATETIMEEVENTS.csv.gz b/tests/testdata/v1_4/DATETIMEEVENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..aa1b273f89e6dd7854ffc364c6dc36b8747bd510 GIT binary patch literal 131 zcmV-}0DS)+iwFqR9EVu|14KboMN~;mMMYLcPE=DaV{>)@HOxy6fFKM6(7Ukq5)~T2 z@Jl}uSKj{?BIs(~r0E!{{f^FThIK#C$|?a|UW!&(;QESSp0EK(BxZacr4oL&ex6TA lihpX&gB_^E;)jr^(nvzqBx78Z*Gy~Z%mb$YB+`-q003S0H@pA< literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/DIAGNOSES_ICD.csv.gz b/tests/testdata/v1_4/DIAGNOSES_ICD.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..9bf3e32f604f8faa94b7a67582f45795cce8e394 GIT binary patch literal 81 zcmV-X0IvTZiwFqR9EVu|14KzdM@~;uMN?l%LqslPb9Mkz3i1z+_jFOxQ3?)q@^Wp}s(tr?ZP?ytBWHs}dIg2y^|)HUIzssHY#h literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/DRGCODES.csv.gz b/tests/testdata/v1_4/DRGCODES.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..61f96689770fadb42c8bf97d86f5aa779a09303a GIT binary patch literal 102 zcmV-s0Ga=N(k>=N(d>FDm~AM6^emt0)N5X|+7zyEtl;HAJ%C1o3X%D=N(k>=GXkp8!i5QU!z9#*_d5 zMX{%y$!<28zKhRvBzCGl6hJDGfN1nEIID!7*mYQ@yoZ(vZD U&$cD2WNIGQAGB&pd~5&!06RS|PXGV_ literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/D_LABITEMS.csv.gz b/tests/testdata/v1_4/D_LABITEMS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..468fdda695cf3061034b800dd0ab6c3d65f0d602 GIT binary patch literal 83 zcmV-Z0IdHXiwFqR9EVu|14Lg;K|)DXMNLyKV{>)@Qws7AkN0#@(oymZarFf=d>oxz peSox^Pbir0>=@$e?jIBhlJfWTbB=fRcX3tX0sydi3-3Ar0029^B9s6C literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/ICUSTAYS.csv.gz b/tests/testdata/v1_4/ICUSTAYS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1df5d1a48d29905991f8bb550978fd1b3145af26 GIT binary patch literal 120 zcmV-;0Ehn{iwFqR9EVu|14%)@RgXau!ypJn_dSN|UV@Op;r?Ngk(g%7KM)qzksMx!o8uGx(SJE z))OkH#sZ3?m{F7kN7T)@ZBRjS!!QWE_ZdF-6Id_` zsRbH@CebPX|3ziVNu5kjS{5y^yO`b{jLY{@>UyQ%v$z1Ji8$2a(2hS2=FW*aCcy)- zQyOhZTE$3AklT#XKCh(oA`WCLw1KY!Igp(~?W6Bv?0K?T9uf6d8h$|@x8 zEfZ1+#@SirbKrX`gLBv1h~s7V;Oelw9G$A^f{SL772@tf#aSw`2;Chi?}`8tXfuK` v#`oyFmiLwY(9~C!2Vu_;*LLQn{*%R-rW!|<(|u23r2X{^S6fRD)&T$j)^Kxm literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/LABEVENTS.csv.gz b/tests/testdata/v1_4/LABEVENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..6f07e45dc99aa8f418a305984a9f935011460bba GIT binary patch literal 94 zcmV-k0HOaMiwFqR9EVu|1580eMOH;lR8uZvb9Mkz3i1z+_jFOxQ3?)q@^W)@O^izp!ypVq_dN#d zUcv#Zam7z&NZJ+mzeORWN_Ov!$73|@O`Lg}QkOSdOj*f=Np>ZLBk-RgVQ|T8n}EZ7 z9)@q`=D64%Bz;}iA(4(ZDarS-7HjB6%T?-pcAWSMNwI-HxVudullF;tPdD=Qn2uo0 R%AgUO#}5QL6>r-B001nvM_T{@ literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/NOTEEVENTS.csv.gz b/tests/testdata/v1_4/NOTEEVENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..474d53d51c1279c56f9534e176404b2942f5f358 GIT binary patch literal 116 zcmV-)0E_=0iwFqR9EVu|15Qs=MMYLcPE=DaV{>)@HOxy6fG`L}(YecD>?SIZ7L$qr zj5>1vyRh{$J1;kRV7gyIFoJK>%oy(|iRvpX$%0bw?~vN!70)`u37@1jh0001oFE9)M literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/OUTPUTEVENTS.csv.gz b/tests/testdata/v1_4/OUTPUTEVENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d97623153c16f934ce26fa49c463cd45954fef28 GIT binary patch literal 124 zcmV-?0E7P@iwFqR9EVu|15Z^{P*qe#Rz*%!Q!Zn3b^tYuOAde_3`F-HgSD3cYy?9s zDTBn7_rHY!bT!{Rqtc$Znbnl)4)T~G*o~))z82Vf1|O^fk*N9j literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/PATIENTS.csv.gz b/tests/testdata/v1_4/PATIENTS.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..729a8f9dda108656dc333a58453d2f1a6c045a45 GIT binary patch literal 91 zcmV-h0HpsPiwFqR9EVu|15iO!NkvXnQ!Zn3b^ucf@(+*qbWze#3J!Jha&-=YaNJ$} xTwH^I3>SYVFzEsz<30R?1E9>{U_T(sH6p+>$Ti;0$I)Gh3jjp0R$Q$tcoP*h1zPE#&pb9Mk#jzJQ`AP7b8;<}fhLXDjS z9mKY-xc@Dxi7v8X{`;T#;3R!5qI=Lq$YYQbk2pMNU*xUrkmnV{>)@ZI3~c!!QU0?|FvL z`2-eI4p|Fa2qp0;|Nq4*vR#{OAdirY z#!%i#0=cJeG*=vdwcK#FL?Nbc*j`NLYxgt9?vkeNVb1VQv*^!X)ahtd>MaGyl*!j` z$q$WDlFPDhBE=TyNYiVqX^2*cDjvaD;w6$M%nmBOzMUPle0n6y85-pYZuj%JzH3|m Sf0f%Z!|4|+B)AoF0RR9|qGJ{S literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/PROCEDURES_ICD.csv.gz b/tests/testdata/v1_4/PROCEDURES_ICD.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..a7ab12c798d3fae393ee76af50c945c9ce8f15ac GIT binary patch literal 82 zcmV-Y0ImNYiwFqR9EVu|15i>=Lq$YYQbki=Nkc>~V{>)@Qws7AkN0#@(oqTyb@Fm` o4uNny99?{&!mfewexbfVmZ!6eWxTV$i>neB00?va$~FK10LAJc8UO$Q literal 0 HcmV?d00001 diff --git a/tests/testdata/v1_4/SERVICES.csv.gz b/tests/testdata/v1_4/SERVICES.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..b08671b646d9b72ba8e3d7978fa53f965517b2cb GIT binary patch literal 90 zcmV-g0HyyQiwFqR9EVu|15-s(R!KueQ!Zn3b^ucf@(+*qbWze#3J!Jha&-=Ya6BAc wd?BomAVo{g+9`aIBrYSxr{ABgn*N{W*(3v)DG_U5?6tcVfXH($dq3v2?JE8c1$~ hGXFjHYT1vIv3{Z;bUr6h<*g%Tvs{@BV&s_s006YiIynFU literal 0 HcmV?d00001