Permalink
...
Comparing changes
Open a pull request
- 10 commits
- 37 files changed
- 0 commit comments
- 3 contributors
Commits on Jul 11, 2017
|
|
tompollard |
2318d0c
|
Commits on Jul 20, 2017
|
|
alistairewj |
9e05ab2
|
Commits on Jul 25, 2017
|
|
alistairewj |
e887d57
|
|||
|
|
alistairewj |
69925c5
|
Commits on Jul 26, 2017
|
|
alistairewj |
b34133c
|
|||
|
|
alistairewj |
ddec254
|
|||
|
|
alistairewj |
9ae812f
|
Commits on Aug 03, 2017
|
|
ZhangAngus |
da694d7
|
Commits on Aug 06, 2017
|
|
alistairewj |
d4fa487
|
|||
|
|
alistairewj |
36ee9c3
|
Unified
Split
Showing
with
136 additions
and 91 deletions.
- +4 −0 README.md
- +8 −16 buildmimic/postgres/Makefile
- +41 −4 buildmimic/postgres/README.md
- +16 −8 buildmimic/postgres/create_mimic_user.sh
- +2 −0 concepts/durations/crrt-durations.sql
- +0 −2 concepts/durations/ventilation-durations.sql
- +0 −2 concepts/firstday/labs-first-day.sql
- +6 −0 concepts/make-concepts.sql
- +12 −12 tests/test_mysql_build.py
- +12 −12 tests/test_oracle_build.py
- +35 −35 tests/test_postgres_build.py
- BIN tests/testdata/v1_4/ADMISSIONS.csv.gz
- BIN tests/testdata/v1_4/CALLOUT.csv.gz
- BIN tests/testdata/v1_4/CAREGIVERS.csv.gz
- BIN tests/testdata/v1_4/CHARTEVENTS.csv.gz
- BIN tests/testdata/v1_4/CPTEVENTS.csv.gz
- BIN tests/testdata/v1_4/DATETIMEEVENTS.csv.gz
- BIN tests/testdata/v1_4/DIAGNOSES_ICD.csv.gz
- BIN tests/testdata/v1_4/DRGCODES.csv.gz
- BIN tests/testdata/v1_4/D_CPT.csv.gz
- BIN tests/testdata/v1_4/D_ICD_DIAGNOSES.csv.gz
- BIN tests/testdata/v1_4/D_ICD_PROCEDURES.csv.gz
- BIN tests/testdata/v1_4/D_ITEMS.csv.gz
- BIN tests/testdata/v1_4/D_LABITEMS.csv.gz
- BIN tests/testdata/v1_4/ICUSTAYS.csv.gz
- BIN tests/testdata/v1_4/INPUTEVENTS_CV.csv.gz
- BIN tests/testdata/v1_4/INPUTEVENTS_MV.csv.gz
- BIN tests/testdata/v1_4/LABEVENTS.csv.gz
- BIN tests/testdata/v1_4/MICROBIOLOGYEVENTS.csv.gz
- BIN tests/testdata/v1_4/NOTEEVENTS.csv.gz
- BIN tests/testdata/v1_4/OUTPUTEVENTS.csv.gz
- BIN tests/testdata/v1_4/PATIENTS.csv.gz
- BIN tests/testdata/v1_4/PRESCRIPTIONS.csv.gz
- BIN tests/testdata/v1_4/PROCEDUREEVENTS_MV.csv.gz
- BIN tests/testdata/v1_4/PROCEDURES_ICD.csv.gz
- BIN tests/testdata/v1_4/SERVICES.csv.gz
- BIN tests/testdata/v1_4/TRANSFERS.csv.gz
View
4
README.md
| @@ -16,6 +16,10 @@ Our team has worked hard to create and share the MIMIC dataset. We encourage you | ||
| We encourage users to share concepts they have extracted by writing code which generates a materialized view. These materialized views can then be used by researchers around the world to speed up data extraction. For example, ventilation durations can be acquired by creating the ventdurations view in [etc/ventilation-durations.sql](https://github.com/MIT-LCP/mimic-code/blob/master/concepts/ventilation-durations.sql). | ||
| +## License | ||
| + | ||
| +By committing your code to the [MIMIC Code Repository](https://github.com/mit-lcp/mimic-code) you agree to release the code under the [MIT License attached to the repository](https://github.com/mit-lcp/mimic-code/blob/master/LICENSE). | ||
| + | ||
| ## Coding style | ||
| Please refer to the [style guide](https://github.com/MIT-LCP/mimic-code/blob/master/styleguide.md) for guidelines on formatting your code for the repository. | ||
View
24
buildmimic/postgres/Makefile
| @@ -1,26 +1,18 @@ | ||
| # Config | ||
| PHYSIONETURL=https://physionet.org/works/MIMICIIIClinicalDatabase/files/ | ||
| -# The following check whether values are passed via environment, set to defaults if not | ||
| -ifeq ($(DBNAME),) | ||
| +# Set the following parameters to defaults | ||
| +# These will be overwritten by settings passed to the makefile | ||
| DBNAME := mimic | ||
| -endif | ||
| - | ||
| -ifeq ($(DBUSER),) | ||
| DBUSER := postgres | ||
| -endif | ||
| - | ||
| -# Specify the password here | ||
| -# If you don't specify a password, then the role will not require one to login via password authentication | ||
| -#DBPASS= | ||
| - | ||
| -# Change "mimiciii" to specify a different schema | ||
| +DBPASS := postgres | ||
| DBSCHEMA := mimiciii | ||
| # NOTE: you do not need to specify localhost/port | ||
| -# in fact, this is detrimental if you want to use peer authentication, as "localhost" is not strictly local | ||
| -#DBHOST := localhost | ||
| -#DBPORT := 5432 | ||
| +# in fact, this is detrimental if you want to use peer authentication | ||
| +# "localhost" uses a loopback, so peer authentication doesn't work with it | ||
| +DBHOST := | ||
| +DBPORT := | ||
| # when connecting, we use a single variable: DBSTRING | ||
| # **do not modify this** | ||
| @@ -66,7 +58,7 @@ create-user: | ||
| @echo '------------------------' | ||
| @echo '' | ||
| @sleep 2 | ||
| - MIMIC_USER="$(DBUSER)" MIMIC_DB="$(DBNAME)" MIMIC_PASSWORD="$(DBPASS)" MIMIC_SCHEMA="$(DBSCHEMA)" ./create_mimic_user.sh | ||
| + MIMICUSER="$(DBUSER)" MIMIC_DB="$(DBNAME)" MIMIC_PASSWORD="$(DBPASS)" MIMIC_SCHEMA="$(DBSCHEMA)" ./create_mimic_user.sh | ||
| mimic-build-gz: | ||
| @echo '------------------------' | ||
View
45
buildmimic/postgres/README.md
| @@ -22,16 +22,27 @@ For example, to create MIMIC from a set of zipped CSV files in the "/path/to/dat | ||
| $ make mimic datadir="/path/to/data/" | ||
| ``` | ||
| -If default connection parameters are not correct, specify in Makefile header or in environment, e.g.: | ||
| +By default, the Makefile uses the following parameters: | ||
| + | ||
| +* Database name: `mimic` | ||
| +* User name: `postgres` | ||
| +* Password: `postgres` | ||
| +* Schema: `mimiciii` | ||
| +* Host: none (defaults to localhost) | ||
| +* Port: none (defaults to 5432) | ||
| + | ||
| +If you would like to change any of these parameters, you can do so in the make call: | ||
| ``` bash | ||
| -$ DBNAME="my_db" DBPASS="my_pass" DBHOST="192.168.0.1" make mimic-build datadir="/path/to/data/" | ||
| +$ make mimic datadir="/path/to/data/" DBNAME="my_db" DBPASS="my_pass" DBHOST="192.168.0.1" | ||
| ``` | ||
| -When using the database be sure to switch to the mimic namespace, | ||
| +When using the database be sure to change the default search path to the mimic schema: | ||
| ```bash | ||
| -$ psql mimic | ||
| +# connect to database mimic | ||
| +$ psql -d mimic | ||
| +# set default schema to mimiciii | ||
| mimic=# SET search_path TO mimiciii; | ||
| ``` | ||
| @@ -45,3 +56,29 @@ LINE 1: CREATE SCHEMA IF NOT EXISTS mimiciii; | ||
| ``` | ||
| The `IF NOT EXISTS` syntax was introduced in PostgreSQL 9.3. Make sure you have the latest PostgreSQL version. While one possible option is to modify the code here to be function under earlier versions, we highly recommend upgrading as most of the code written in this repository uses materialized views (which were introduced in PostgreSQL version 9.4). | ||
| + | ||
| +## NOTICE | ||
| + | ||
| +```sql | ||
| +NOTICE: materialized view "XXXXXX" does not exist, skipping | ||
| +``` | ||
| + | ||
| +This is normal. By default, the script attempts to delete tables before rebuilding them. If it cannot find the table to delete, it outputs a notice letting the user know. | ||
| + | ||
| +## Stuck on copy | ||
| + | ||
| +Many users report that the scripts get stuck at the following point: | ||
| + | ||
| +``` | ||
| +COPY 58976 | ||
| +COPY 34499 | ||
| +COPY 7567 | ||
| +``` | ||
| + | ||
| +This is expected. The 4th table is CHARTEVENTS, and this table can take many hours to load. Give it time, and ensure that the computer does not automatically hibernate during this time. | ||
| + | ||
| +Also note that eventually, the 4th line will read `COPY 0`. This is expected, see https://github.com/MIT-LCP/mimic-code/issues/182 | ||
| + | ||
| +## Other | ||
| + | ||
| +Please see the issues page to discuss other issues you may be having: https://github.com/MIT-LCP/mimic-code/issues | ||
View
24
buildmimic/postgres/create_mimic_user.sh
| @@ -1,4 +1,5 @@ | ||
| #!/bin/bash | ||
| +set -e | ||
| if [ -z ${MIMIC_PASSWORD+x} ]; then | ||
| echo "MIMIC_PASSWORD is unset"; | ||
| @@ -16,9 +17,9 @@ fi | ||
| if [ -z ${MIMIC_USER+x} ]; then | ||
| MIMIC_USER=postgres | ||
| - echo "MIMIC_USER is unset, using default '$MIMIC_USER'"; | ||
| + echo "User is unset, using default '$MIMIC_USER'"; | ||
| else | ||
| - echo "MIMIC_USER is set to '$MIMIC_USER'"; | ||
| + echo "User is set to '$MIMIC_USER'"; | ||
| fi | ||
| # if hash gosu 2>/dev/null; then | ||
| @@ -27,9 +28,16 @@ fi | ||
| # SUDO='sudo -u postgres' | ||
| # fi | ||
| -$SUDO psql postgres > /dev/null <<- EOSQL | ||
| - CREATE USER $MIMIC_USER WITH PASSWORD '$MIMIC_PASSWORD'; | ||
| - DROP DATABASE IF EXISTS $MIMIC_DB; | ||
| - CREATE DATABASE $MIMIC_DB OWNER $MIMIC_USER; | ||
| - CREATE SCHEMA $MIMIC_SCHEMA AUTHORIZATION $MIMIC_USER; | ||
| -EOSQL | ||
| +if [ "$MIMIC_USER" != "postgres" ]; then | ||
| + # create user | ||
| + psql postgres -c "DROP USER IF EXISTS $MIMIC_USER;" | ||
| + psql postgres -c "CREATE USER $MIMIC_USER WITH PASSWORD '$MIMIC_PASSWORD';" | ||
| +fi | ||
| + | ||
| +# create database | ||
| +psql postgres -c "DROP DATABASE IF EXISTS $MIMIC_DB;" | ||
| +psql postgres -c "CREATE DATABASE $MIMIC_DB OWNER $MIMIC_USER;" | ||
| + | ||
| +# create schema on database | ||
| +export PGPASSWORD=$MIMIC_PASSWORD | ||
| +psql -U $MIMIC_USER -d ${MIMIC_DB} -c "CREATE SCHEMA $MIMIC_SCHEMA AUTHORIZATION $MIMIC_USER;" | ||
View
2
concepts/durations/crrt-durations.sql
| @@ -195,6 +195,8 @@ select icustay_id | ||
| , ROW_NUMBER() over (partition by icustay_id order by num) as num | ||
| , min(charttime) as starttime | ||
| , max(charttime) as endtime | ||
| + , extract(epoch from max(charttime)-min(charttime))/60/60 AS duration_hours | ||
| + -- add durations | ||
| from vd2 | ||
| group by icustay_id, num | ||
| having min(charttime) != max(charttime) | ||
View
2
concepts/durations/ventilation-durations.sql
| @@ -255,5 +255,3 @@ having min(charttime) != max(charttime) | ||
| -- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored | ||
| and max(mechvent) = 1 | ||
| order by icustay_id, ventnum; | ||
| - | ||
| -DROP MATERIALIZED VIEW ventsettings; | ||
View
2
concepts/firstday/labs-first-day.sql
| @@ -153,5 +153,3 @@ FROM | ||
| ) pvt | ||
| GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id | ||
| ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id; | ||
| - | ||
| -commit; | ||
View
6
concepts/make-concepts.sql
| @@ -1,7 +1,13 @@ | ||
| -- This file makes all materialized views in this subfolder | ||
| -- Note that this may take a large amount of time and hard drive space | ||
| +\echo '' | ||
| +\echo '===' | ||
| \echo 'Beginning to create materialized views for MIMIC database.' | ||
| +\echo 'Any notices of the form "NOTICE: materialized view "XXXXXX" does not exist" can be ignored.' | ||
| +\echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.' | ||
| +\echo '===' | ||
| +\echo '' | ||
| \echo 'Top level files..' | ||
| \i code-status.sql | ||
View
24
tests/test_mysql_build.py
| @@ -8,7 +8,7 @@ | ||
| sqluser = 'root' | ||
| testdbname = 'mimic_test_db' | ||
| hostname = 'localhost' | ||
| -datadir = 'testdata/v1_3/' | ||
| +datadir = 'testdata/v1_4/' | ||
| schema = 'mimiciii' | ||
| # Set paths for scripts to be tested | ||
| @@ -22,23 +22,23 @@ | ||
| "ADMISSIONS": 58976, | ||
| "CALLOUT": 34499, | ||
| "CAREGIVERS": 7567, | ||
| -"CHARTEVENTS": 263201375, | ||
| +"CHARTEVENTS": 330712483, | ||
| "CPTEVENTS": 573146, | ||
| "D_CPT": 134, | ||
| "D_ICD_DIAGNOSES": 14567, | ||
| "D_ICD_PROCEDURES": 3882, | ||
| "D_ITEMS": 12478, | ||
| -"D_LABITEMS": 755, | ||
| -"DATETIMEEVENTS": 4486049, | ||
| +"D_LABITEMS": 753, | ||
| +"DATETIMEEVENTS": 4485937, | ||
| "DIAGNOSES_ICD": 651047, | ||
| "DRGCODES": 125557, | ||
| "ICUSTAYS": 61532, | ||
| -"INPUTEVENTS_CV": 17528894, | ||
| +"INPUTEVENTS_CV": 17527935, | ||
| "INPUTEVENTS_MV": 3618991, | ||
| -"LABEVENTS": 27872575, | ||
| -"MICROBIOLOGYEVENTS": 328446, | ||
| -"NOTEEVENTS": 2078705, | ||
| -"OUTPUTEVENTS": 4349339, | ||
| +"LABEVENTS": 27854055, | ||
| +"MICROBIOLOGYEVENTS": 631726, | ||
| +"NOTEEVENTS": 2083180, | ||
| +"OUTPUTEVENTS": 4349218, | ||
| "PATIENTS": 46520, | ||
| "PRESCRIPTIONS": 4156848, | ||
| "PROCEDUREEVENTS_MV": 258066, | ||
| @@ -50,10 +50,10 @@ def run_mysql_build_scripts(cur): | ||
| # Create tables and loads data | ||
| fn = curpath + '../buildmimic/mysql/1-define.sql' | ||
| cur.execute(open(fn, "r").read()) | ||
| - if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': | ||
| + if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': | ||
| # use full dataset | ||
| mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' | ||
| - else: | ||
| + else: | ||
| mimic_data_dir = curpath+datadir | ||
| call(['mysql','-f',fn,'-d',testdbname,'-U',sqluser,'-v','mimic_data_dir='+mimic_data_dir]) | ||
| # # Add constraints | ||
| @@ -74,7 +74,7 @@ def setUpClass(cls): | ||
| cls.con = MySQLdb.connect(host=hostname, user=sqluser) | ||
| cls.cur = cls.con.cursor() | ||
| # Create test database | ||
| - try: | ||
| + try: | ||
| cls.cur.execute('DROP DATABASE ' + testdbname) | ||
| except MySQLdb.OperationalError: | ||
| pass | ||
View
24
tests/test_oracle_build.py
| @@ -8,7 +8,7 @@ | ||
| sqluser = 'root' | ||
| testdbname = 'mimic_test_db' | ||
| hostname = 'localhost' | ||
| -datadir = 'testdata/v1_3/' | ||
| +datadir = 'testdata/v1_4/' | ||
| schema = 'mimiciii' | ||
| # Set paths for scripts to be tested | ||
| @@ -22,23 +22,23 @@ | ||
| "ADMISSIONS": 58976, | ||
| "CALLOUT": 34499, | ||
| "CAREGIVERS": 7567, | ||
| -"CHARTEVENTS": 263201375, | ||
| +"CHARTEVENTS": 330712483, | ||
| "CPTEVENTS": 573146, | ||
| "D_CPT": 134, | ||
| "D_ICD_DIAGNOSES": 14567, | ||
| "D_ICD_PROCEDURES": 3882, | ||
| "D_ITEMS": 12478, | ||
| -"D_LABITEMS": 755, | ||
| -"DATETIMEEVENTS": 4486049, | ||
| +"D_LABITEMS": 753, | ||
| +"DATETIMEEVENTS": 4485937, | ||
| "DIAGNOSES_ICD": 651047, | ||
| "DRGCODES": 125557, | ||
| "ICUSTAYS": 61532, | ||
| -"INPUTEVENTS_CV": 17528894, | ||
| +"INPUTEVENTS_CV": 17527935, | ||
| "INPUTEVENTS_MV": 3618991, | ||
| -"LABEVENTS": 27872575, | ||
| -"MICROBIOLOGYEVENTS": 328446, | ||
| -"NOTEEVENTS": 2078705, | ||
| -"OUTPUTEVENTS": 4349339, | ||
| +"LABEVENTS": 27854055, | ||
| +"MICROBIOLOGYEVENTS": 631726, | ||
| +"NOTEEVENTS": 2083180, | ||
| +"OUTPUTEVENTS": 4349218, | ||
| "PATIENTS": 46520, | ||
| "PRESCRIPTIONS": 4156848, | ||
| "PROCEDUREEVENTS_MV": 258066, | ||
| @@ -50,10 +50,10 @@ | ||
| # # Create tables and loads data | ||
| # fn = curpath + '../buildmimic/mysql/1-define.sql' | ||
| # cur.execute(open(fn, "r").read()) | ||
| -# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': | ||
| +# if os.environ.has_key('USER') and os.environ['USER'] == 'jenkins': | ||
| # # use full dataset | ||
| # mimic_data_dir = '/home/mimicadmin/data/mimiciii_1_3/' | ||
| -# else: | ||
| +# else: | ||
| # mimic_data_dir = curpath+datadir | ||
| # call(['mysql','-f',fn,'-d',testdbname,'-U',sqluser,'-v','mimic_data_dir='+mimic_data_dir]) | ||
| # # # Add constraints | ||
| @@ -74,7 +74,7 @@ | ||
| # cls.con = MySQLdb.connect(host=hostname, user=sqluser) | ||
| # cls.cur = cls.con.cursor() | ||
| # # Create test database | ||
| -# try: | ||
| +# try: | ||
| # cls.cur.execute('DROP DATABASE ' + testdbname) | ||
| # except MySQLdb.OperationalError: | ||
| # pass | ||
Oops, something went wrong.