In [1]:
import json
import re
from file_utils import *
import ruamel.yaml
from shutil import copyfile

In [3]:
def load_yaml(filepath):    
    with open(filepath) as f:
        liquibase_yaml = f.read()
    return ruamel.yaml.load(liquibase_yaml, ruamel.yaml.RoundTripLoader)

def extract_information(pgsql, liquibase):
    with open(pgsql, 'r') as f:
        pgsql_comments = f.read()

    # Remove comments
    pgsql_comments = re.sub('\/\*(\s*|.)*\*\/', '', pgsql_comments)
    pgsql_comments = re.sub('SET.*;', '', pgsql_comments)
    pgsql_comments = re.sub('--.*', '', pgsql_comments)

    comments_on_table = [e[0].replace('\n', ' ') for e in re.findall("COMMENT\s+ON\s+TABLE((\s*|.)*')\s*;", pgsql_comments)]
    comments_on_column = [e[0].replace('\n', ' ') for e in re.findall("COMMENT\s+ON\s+COLUMN((\s*|.)*')\s*;", pgsql_comments)]

    result = {
        'tables': {},
        'icu': {},
        'signals': {},
        'services': {},
    }

    for comment in comments_on_table:
        table_name = re.findall("\s*(\w*)\s*", comment)[0]
        result['tables'][table_name] = {
            'remarks': re.findall("'((\s*|.)*)'", comment)[0][0],
            'columns': {}
        }
    for comment in comments_on_column:
        table_name = re.findall("(\w*)\.\w*\s*", comment)[0]
        column_name = re.findall("\w*\.(\w*)\s*", comment)[0]
        if not column_name in result['tables'][table_name]['columns']:
            result['tables'][table_name]['columns'][column_name] = {}
        result['tables'][table_name]['columns'][column_name]['remarks'] = re.findall("'((\s*|.)*)'", comment)[0][0]

    liqui = load_yaml(liquibase)
    for e in liqui['databaseChangeLog']:
        if not 'changeSet' in e or not 'createTable' in e['changeSet']['changes'][0]:
            continue
        table_name = e['changeSet']['changes'][0]['createTable']['tableName']
        for col in e['changeSet']['changes'][0]['createTable']['columns']:
            col = col['column']
            column_name = col['name']
            if column_name not in result['tables'][table_name]['columns']:
                print(table_name, column_name, 'not in sql!')
                continue
            result['tables'][table_name]['columns'][column_name]['type'] = col['type']
            result['tables'][table_name]['columns'][column_name]['constraints'] = {}
            if 'constraints' in col:
                for constraint in col['constraints']:
                    result['tables'][table_name]['columns'][column_name]['constraints'][constraint] = col['constraints'][constraint]

    return result

def extend_with_remarks(mimic3_dict, liquibase, outfile):
    liqui = load_yaml(liquibase)
    for e in liqui['databaseChangeLog']:
        if not 'changeSet' in e or not 'createTable' in e['changeSet']['changes'][0]:
            continue
        table_name = e['changeSet']['changes'][0]['createTable']['tableName']
        e['changeSet']['changes'][0]['createTable']['remarks'] = mimic3_dict['tables'][table_name]['remarks']
        for col in e['changeSet']['changes'][0]['createTable']['columns']:
            col = col['column']
            column_name = col['name']
            col['remarks'] =  mimic3_dict['tables'][table_name]['columns'][column_name]['remarks']
    
    with open(outfile, 'w+') as f:
        f.write(ruamel.yaml.dump(liqui, Dumper=ruamel.yaml.RoundTripDumper))

pgsql = 'postgres_add_comments.sql' # read-only
liquibase = 'mimic-iii.yaml' # read-only
extended_liquibase_with_remarks = 'mimic-iii-with_comments.yaml' # to be created: w+
mimic3_json = 'dict.json' # to be created: w+


mimic3_dict = extract_information(pgsql, liquibase)

extend_with_remarks(mimic3_dict, liquibase, extended_liquibase_with_remarks)


# add long description about each table and table column
##############
##ADMISSIONS##
##############

mimic3_dict['tables']['ADMISSIONS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['ADMISSIONS']['table_purpose'] = 'Define a patient’s hospital admission, HADM_ID.'
mimic3_dict['tables']['ADMISSIONS']['brief_summary'] = 'The ADMISSIONS table gives information regarding a patient’s admission to the hospital. Since each unique hospital visit for a patient is assigned a unique HADM_ID, the ADMISSIONS table can be considered as a definition table for HADM_ID. Information available includes timing information for admission and discharge, demographic information, the source of the admission, and so on.'
mimic3_dict['tables']['ADMISSIONS']['important_considerations'] = [
    'The data is sourced from the admission, discharge and transfer database from the hospital (often referred to as ‘ADT’ data).',
    'Organ donor accounts are sometimes created for patients who died in the hospital. These are distinct hospital admissions with very short, sometimes negative lengths of stay. Furthermore, their DEATHTIME is frequently the same as the earlier patient admission’s DEATHTIME.',
    'All text data, except for that in the INSURANCE column, is stored in upper case.'
]
mimic3_dict['tables']['ADMISSIONS']['detailed_description'] = {
    'summary': 'The ADMISSIONS table defines all HADM_ID present in the database, covering an admission period between 1 June 2001 and 10 October 2012.',
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Each row of this table contains a unique `HADM_ID`, which repLresents a single patient's admission to the hospital. `HADM_ID` ranges from 1000000 - 1999999. It is possible for this table to have duplicate `SUBJECT_ID`, indicating that a single patient had multiple admissions to the hospital. The ADMISSIONS table can be linked to the PATIENTS table using `SUBJECT_ID`."),
        (['ADMITTIME', 'DISCHTIME', 'DEATHTIME'], "`ADMITTIME` provides the date and time the patient was admitted to the hospital, while `DISCHTIME` provides the date and time the patient was discharged from the hospital. If applicable, `DEATHTIME` provides the time of in-hospital death for the patient. Note that `DEATHTIME` is only present if the patient died in-hospital, and is almost always the same as the patient's `DISCHTIME`. However, there can be some discrepancies due to typographical errors."),
        (['ADMISSION_TYPE'], "`ADMISSION_TYPE` describes the type of the admission: 'ELEllCTIVE', 'URGENT', 'NEWBORN' or 'EMERGENCY'. Emergency/urgent indicate unplanned medical care, and are often collapsed into a single category in studies. Elective indicates a previously planned hospital admission. Newborn indicates that the `HADM_ID` pertains to the patient's birth."),
        (['ADMISSION_LOCATION'], "`ADMISSION_LOCATION` provides information about the previous location of the patient prior to arriving at the hospital. There are 9 possible values:\n\n* EMERGENCY ROOM ADMIT\n* TRANSFER FROM HOSP/EXTRAM\n* TRANSFER FROM OTHER HEALT\n* CLINIC REFERRAL/PREMATURE\n* ** INFO NOT AVAILABLE **\n* TRANSFER FROM SKILLED NUR\n* TRSF WITHIN THIS FACILITY\n* HMO REFERRAL/SICK\n* PHYS REFERRAL/NORMAL DELI\n\nThe truncated text occurs in the raw data."),
        (['INSURANCE', 'LANGUAGE', 'RELIGION', 'MARITAL_STATUS', 'ETHNICITY'], "The `INSURANCE`, `LANGUAGE`, `RELIGION`, `MARITAL_STATUS`, `ETHNICITY` columns describe patient demographics. These columns occur in the ADMISSIONS table as they are originally sourced from the admission, discharge, and transfers (ADT) data from the hospital database. The values occasionally change between hospital admissions (`HADM_ID`) for a single patient (`SUBJECT_ID`). This is reasonable for some fields (e.g. `MARITAL_STATUS`, `RELIGION`), but less reasonable for others (e.g. `ETHNICITY`)."),
        (['EDREGTIME', 'EDOUTTIME'], "Time that the patient was registered and discharged from the emergency department."),
        (['DIAGNOSIS'], "The `DIAGNOSIS` column provides a preliminary, free text diagnosis for the patient on hospital admission. The diagnosis is usually assigned by the admitting clincian and does not use a systematic ontology. As of MIMIC-III v1.0 there were 15,693 distinct diagnoses for 58,976 admissions. The diagnoses can be very informative (e.g. chronic kidney failure) or quite vague (e.g. weakness). Final diagnoses for hospital admissions are coded and can be found in the DIAGNOSES_ICD table.")
    ] 
}

###########
##CALLOUT##
###########

mimic3_dict['tables']['CALLOUT']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['CALLOUT']['table_purpose'] = 'Provides information when a patient was READY for discharge from the ICU, and when the patient was actually discharged from the ICU.'
mimic3_dict['tables']['CALLOUT']['brief_summary'] = 'The CALLOUT table provides information about ICU discharge planning. When a patient is deemed ready to leave the ICU, they are "called out". This process involves: (i) a care provider registering that the patient is ready to leave the ICU and detailing any specialized precautions required, (ii) a coordinator acknowledging the patient requires a bed outside the ward, (iii) a variable period of time in order to coordinate the transfer, and finally (iv) an outcome: either the patient is called out (discharged) or the call out event is canceled. This table provides information for all of the above.'
mimic3_dict['tables']['CALLOUT']['important_considerations'] = [
    "Call out data is *not* available for all adult patients, as the data collection only began part way through the collection of the MIMIC database", 
    "Call out data is *never* available for neonates"
]
mimic3_dict['tables']['CALLOUT']['detailed_description'] = {
    'summary': 'The CALLOUT table provides information for "call out events". A call out event occurs as follows: first, a patient is ready for discharge from the ICU. A care provider creates a call out request (occurring at `CREATETIME`) stating the desired ward and any necessary precautions. The call out request is acknowledged, usually within a short period of time (`ACKNOWLEDGETIME`). When the patient is actually transferred out of the ICU, the `CALLOUT_OUTCOME` becomes "Discharged" and the `OUTCOMETIME` column contains the time at which the patient was discharged.',
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "`SUBJECT_ID` and `HADM_ID` define the patient and hospital admission corresponding to the given call out event.\n? Unique"),
        (['SUBMIT_WARDID','SUBMIT_CAREUNIT'], "`SUBMIT_WARDID` identifies the ward from which the request was submitted. `SUBMIT_CAREUNIT` indicates whether the `SUBMIT_WARDID` corresponds to an ICU cost center, and if so, what type of ICU cost center."),
        (['CURR_WARDID', 'CURR_CAREUNIT'], "`CURR_WARDID` identifies the ward in which the patient resides when called out (i.e. prior to discharge/transfer). `CURR_CAREUNIT` indicates which ICU cost center the `CURR_WARDID` corresponds to (note: since all patients are being discharged from an ICU, all patients should reside in an ICU cost center)."),
        (['CALLOUT_WARDID', 'CALLOUT_SERVICE'], "`CALLOUT_WARDID` identifies the ward to which the patient should be discharged. Note that `CALLOUT_WARDID` = 0 represents 'Home' and `CALLOUT_WARDID` = 1 represents 'First available ward'. The remaining IDs correspond to distinct wards in the hospital. `CALLOUT_SERVICE` is the service under which the patient should be discharged."),
        (['REQUEST_TELE', 'REQUEST_RESP', 'REQUEST_CDIFF', 'REQUEST_MRSA', 'REQUEST_VRE'], "The request columns are binary indicators which request certain precautions for the patient in the subsequent ward where they will reside. For example, MRSA means that the patient is colonized or infected with MRSA, a drug resistant source of hospital acquired infections."),
        (['CALLOUT_STATUS', 'CALLOUT_OUTCOME'], "`CALLOUT_STATUS` indicates whether the call out is still active or not: if a call out is answered it should be flagged as inactive. `CALLOUT_OUTCOME` is either 'Discharged' or 'Cancelled', indicating whether the patient finally called out (i.e. discharged) or not."),
        (['DISCHARGE_WARDID'], "`DISCHARGE_WARDID` indicates the ward to which the patient was actually discharged. `DISCHARGE_WARDID` = 0 indicates home and other values correspond to distinct wards in the hospital."),
        (['ACKNOWLEDGE_STATUS'], "`ACKNOWLEDGE_STATUS` indicates the response to the callout event: 'Acknowledged', 'Revised', 'Unacknowledged' or 'Reactivated'."),
        (['CREATETIME', 'UPDATETIME', 'ACKNOWLEDGETIME', 'OUTCOMETIME', 'FIRSTRESERVATIONTIME', 'CURRENTRESERVATIONTIME'], "`CREATETIME` provides the time and date that the call out was initiated. `UPDATETIME` provides the last time at which the call out event was updated. `ACKNOWLEDGETIME` is the time at which the call out was first acknowledged.\n`OUTCOMETIME` is the time at which the `CALLOUT_OUTCOME` occurred. `FIRSTRESERVATIONTIME` and `CURRENTRESERVATIONTIME` provide information regarding ward reservations."),
    ]
}

##############
##CAREGIVERS##
##############

mimic3_dict['tables']['CAREGIVERS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['CAREGIVERS']['table_purpose'] = 'Defines the role of caregivers.'
mimic3_dict['tables']['CAREGIVERS']['brief_summary'] = 'This table provides information regarding care givers. For example, it would define if a care giver is a research nurse (RN), medical doctor (MD), and so on.'
mimic3_dict['tables']['CAREGIVERS']['important_considerations'] = []
mimic3_dict['tables']['CAREGIVERS']['detailed_description'] = {
    'summary': 'The CAREGIVERS table provides information regarding the type of caregiver. Each caregiver is represented by a unique integer which maps to this table.',
    'entries': [
        (['CGID'], "`CGID` is a unique identifier for each distinct caregiver present in the database. `CGID` is sourced from two tables in the raw data: the CareVue and Metavision ICU databases. Due to imprecision in the storage of unique identifiers across the database, it is possible that two distinct caregivers with the same names (e.g. RN Sarah Jones and MD Sarah Jones) would be considered as the same caregiver. However, this is an unlikely occurrence."),
        (['LABEL'], "`LABEL` defines the type of caregiver: e.g. RN, MD, PharmD, etc. Note that `LABEL` is a free text field and as such contains many typographical errors and spelling variants of the same concept (e.g. MD, MDs, M.D.)."),
        (['DESCRIPTION'], "`DESCRIPTION` is less frequently present than `LABEL`, and provides additional information regarding the caregiver. This column is much more structured, and contains only 17 unique values as of MIMIC-III v1.0.")
    ]
}

###############
##CHARTEVENTS##
###############

mimic3_dict['tables']['CHARTEVENTS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['CHARTEVENTS']['table_purpose'] = 'Contains all charted data for all patients.'
mimic3_dict['tables']['CHARTEVENTS']['brief_summary'] = "CHARTEVENTS contains all the charted data available for a patient. During their ICU stay, the primary repository of a patient's information is their electronic chart. The electronic chart displays patients' routine vital signs and any additional information relevant to their care: ventilator settings, laboratory values, code status, mental status, and so on. As a result, the bulk of information about a patient's stay is contained in CHARTEVENTS. Furthermore, even though laboratory values are captured elsewhere (LABEVENTS), they are frequently repeated within CHARTEVENTS. This occurs because it is desirable to display the laboratory values on the patient's electronic chart, and so the values are copied from the database storing laboratory values to the database storing the CHARTEVENTS."
mimic3_dict['tables']['CHARTEVENTS']['important_considerations'] = [
    'Some items are duplicated between the labevents and chartevents tables. In cases where there is disagreement between measurements, labevents should be taken as the ground truth.',
]
mimic3_dict['tables']['CHARTEVENTS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['ITEMID'], "Identifier for a single measurement type in the database. Each row associated with one `ITEMID` (e.g. 212) corresponds to an instantiation of the same measurement (e.g. heart rate)."),
        (['CHARTTIME', 'STORETIME'], "`CHARTTIME` records the time at which an observation was made, and is usually the closest proxy to the time the data was actually measured. `STORETIME` records the time at which an observation was manually input or manually validated by a member of the clinical staff."),
        (['CGID'], "`CGID` is the identifier for the caregiver who validated the given measurement."),
        (['VALUE', 'VALUENUM'], "`VALUE` contains the value measured for the concept identified by the `ITEMID`. If this value is numeric, then `VALUENUM` contains the same data in a numeric format. If this data is not numeric, `VALUENUM` is null. In some cases (e.g. scores like Glasgow Coma Scale, Richmond Sedation Agitation Scale and Code Status), `VALUENUM` contains the score and `VALUE` contains the score and text describing the meaning of the score."),
        (['VALUEUOM'], "`VALUEUOM` is the unit of measurement for the `VALUE`, if appropriate."),
        (['WARNING', 'ERROR'], "`WARNING` and `ERROR` are Metavision specific columns which specify if a warning for the value was raised and if an error occurred during the measurement."),
        (['RESULTSTATUS', 'STOPPED'], "`RESULTSTATUS` and `STOPPED` are CareVue specific columns which specify the type of measurement (`RESULTSTATUS` is 'Manual' or 'Automatic') and whether the measurement was stopped."),
    ]
}

#############
##CPTEVENTS##
#############

mimic3_dict['tables']['CPTEVENTS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['CPTEVENTS']['table_purpose'] = 'Contains current procedural terminology (CPT) codes, which facilitate billing for procedures performed on patients.'
mimic3_dict['tables']['CPTEVENTS']['brief_summary'] = 'The CPTEVENTS table contains a list of which current procedural terminology codes were billed for which patients. This can be useful for determining if certain procedures have been performed (e.g. ventilation).'
mimic3_dict['tables']['CPTEVENTS']['important_considerations'] = [
    "The respiratory cost center bills for ventilation *regardless* of duration. That means that 30 minutes of mechanical ventilation for a single day would result in same billing code as a full 24 hours of mechanical ventilation.",
    "Non-invasive ventilation and mechanical ventilation use the same CPT code. Differentiating these concepts requires selecting them using the `DESCRIPTION` column.",
]
mimic3_dict['tables']['CPTEVENTS']['detailed_description'] = {
    'summary': "`CPTEVENTS` contains current procedural terminology (CPT) codes for patients as billed through either the ICU cost center or the respiratory cost center. Each code represents a distinct procedure performed on the patient during their ICU stay.",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], 'Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay.'),
        (['COSTCENTER'], "`COSTCENTER` is the cost center which billed for the corresponding CPT codes. There are two possible cost centers: 'ICU' and 'Resp'. 'Resp' codes correspond to mechanical or non-invasive ventilation and were billed by the respiratory therapist. 'ICU' codes correspond to the procedures billed for by the ICU."),
        (['CHARTDATE'], "The date at which the procedure occurred."),
        (['CPT_CD', 'CPT_NUMBER', 'CPT_SUFFIX'], "`CPT_CD` contains the original CPT code. `CPT_NUMBER` is a numeric version of the `CPT_CD` column, which allows for easier range comparisons in querying. However, note not all `CPT_CD` are fully numeric. The `CPT_SUFFIX` column contains the text suffix when the `CPT_CD` contains non-numeric characters."),
        (['TICKET_ID_SEQ'], "The order of the `CPT_CD`."),
        (['SECTIONHEADER', 'SUBSECTIONHEADER'], "The section headers provide a category for the given CPT code. These headers were assigned using the D_CPT table."),
        (['DESCRIPTION'], "In the case of a `CPT_CD` corresponding to the respiratory cost center, the description provides information about the meaning of the CPT code. Otherwise, the field is null."),
    ]
}

#########
##D_CPT##
#########

mimic3_dict['tables']['D_CPT']['table_source'] = 'Online definitions.'
mimic3_dict['tables']['D_CPT']['table_purpose'] = 'High-level definitions for current procedural terminology (CPT) codes.'
mimic3_dict['tables']['D_CPT']['brief_summary'] = 'This table gives some high level information regarding current procedural terminology (CPT) codes. Unfortunately, detailed information for individual codes is unavailable.'
mimic3_dict['tables']['D_CPT']['important_considerations'] = [
    "Unlike all other definition tables, `D_CPT` does *not* have a one to one mapping with the corresponding `CPT_CD` in `CPTEVENTS`, rather each row of `D_CPT` maps to a range of `CPT_CD`.",
]
mimic3_dict['tables']['D_CPT']['detailed_description'] = {
    'summary': "`D_CPT` provides information about CPT codes, specifically it provides the overall purpose of the procedure and in some cases the body system related to the procedure.",
    'entries': [
        (['CATEGORY'], "`CATEGORY` is a integer which identifies the category of the CPT code."),
        (['SECTIONRANGE', 'SECTIONHEADER'], "`SECTIONRANGE` defines the range of codes for the given section, and `SECTIONHEADER` provides the description of the given section. There are 8 possible sections:\n\n* Evaluation and management\n* Surgery\n* Radiology\n* Anesthesia\n* Emerging technology\n* Pathology and laboratory\n* Performance measurement\n* Medicine\n\nNote that the evaluation and management tends to represent administrative or generic costs."),
        (['SUBSECTIONRANGE', 'SUBSECTIONHEADER'], "Similarly as for the sections, `SUBSECTIONRANGE` defines the range of codes for the given subsection, and `SUBSECTIONHEADER` provides the description of the given subsection. The subsection provides extra detail which can be useful, for example, when the section header is 'Anesthesia' the subsection provides information on the general anatomical region for the anesthesia."),
        (['MINCODEINSUBSECTION', 'MAXCODEINSUBSECTION'], "These columns provide numeric representations of minimum and maximum value in the `SUBSECTIONRANGE` column. This facilitates joining the `CPTEVENTS` table to the `D_CPT` table on `CPT_CD`."),
    ]
}

###################
##D_ICD_DIAGNOSES##
###################

mimic3_dict['tables']['D_ICD_DIAGNOSES']['table_source'] = 'Online definitions.'
mimic3_dict['tables']['D_ICD_DIAGNOSES']['table_purpose'] = 'Definition table for ICD diagnoses.'
mimic3_dict['tables']['D_ICD_DIAGNOSES']['brief_summary'] = "This table defines International Classification of Diseases Version 9 (ICD-9) codes for **diagnoses**. These codes are assigned at the end of the patient's stay and are used by the hospital to bill for care provided."
mimic3_dict['tables']['D_ICD_DIAGNOSES']['important_considerations'] = []
mimic3_dict['tables']['D_ICD_DIAGNOSES']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['ICD9_CODE'], "`ICD9_CODE` is the International Coding Definitions Version 9 (ICD-9) code. Each code corresponds to a single diagnostic concept."),
        (['SHORT_TITLE', 'LONG_TITLE'], "The title fields provide a brief definition for the given diagnosis code in `ICD9_CODE`."),
    ]
}

###################
##D_ICD_DIAGNOSES##
###################

mimic3_dict['tables']['D_ICD_PROCEDURES']['table_source'] = 'Online sources.'
mimic3_dict['tables']['D_ICD_PROCEDURES']['table_purpose'] = 'Definition table for ICD procedures.'
mimic3_dict['tables']['D_ICD_PROCEDURES']['brief_summary'] = "This table defines International Classification of Diseases Version 9 (ICD-9) codes for **procedures**. These codes are assigned at the end of the patient's stay and are used by the hospital to bill for care provided. They can further be used to identify if certain procedures have been performed (e.g. surgery)."
mimic3_dict['tables']['D_ICD_PROCEDURES']['important_considerations'] = []
mimic3_dict['tables']['D_ICD_PROCEDURES']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['ICD9_CODE'], "`ICD9_CODE` is the International Coding Definitions Version 9 (ICD-9) code. Each code corresponds to a single procedural concept."),
        (['SHORT_TITLE', 'LONG_TITLE'], "The title fields provide a brief definition for the given procedure code in `ICD9_CODE`."),
    ]
}

###########
##D_ITEMS##
###########

mimic3_dict['tables']['D_ITEMS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['D_ITEMS']['table_purpose'] = "Definition table for all items in the ICU databases."
mimic3_dict['tables']['D_ITEMS']['brief_summary'] = ""
mimic3_dict['tables']['D_ITEMS']['important_considerations'] = [
    "D_ITEMS is sourced from two *distinct* ICU databases. The main consequence is that there are duplicate `ITEMID` for each concept. For example, heart rate is captured both as an `ITEMID` of 212 (CareVue) and as an `ITEMID` of 220045 (Metavision). As a result, it is necessary to search for multiple `ITEMID` to capture a single concept across the entire database. This can be tedious, and it is an active project to coalesce these `ITEMID` - one which welcomes any and all help provided by the community!",
    "Another source of duplicate `ITEMID` is due to the free text nature of data entry in CareVue - as a result there are additional `ITEMID` which correspond to misspellings or synonymous descriptions of a single concept. It is important to search for all possible abbreviations and descriptions of a concept to capture all associated `ITEMID`.",
    "If the `LINKSTO` column is null, then the data is currently unavailable, but planned for a future release."
]
mimic3_dict['tables']['D_ITEMS']['detailed_description'] = {
    'summary': "The D_ITEMS table defines `ITEMID`, which represents measurements in the database. Measurements of the same type (e.g. heart rate) will have the same `ITEMID` (e.g. 211). The `ITEMID` column is an alternate primary key to this table: it is unique to each row.\n\nNote that the D_ITEMS table is sourced from two ICU databases: Metavision and CareVue. Each system had its own set of `ITEMID` to identify concepts. As a result, there are multiple `ITEMID` which correspond to the same concept. For CareVue data, `ITEMID` = 211 is used to identify heart rates, whereas for Metavision data, `ITEMID` = 220045 is used. All Metavision `ITEMID`s will have a value > 220000.\n\nNote that the D\_ITEMS table does *not* link to the LABEVENTS table, as this data was acquired separately from the hospital database. The D\_ITEMS table was acquired from the ICU databases.",
    'entries': [
        (['ITEMID'], "As an alternate primary key to the table, `ITEMID` is unique to each row."),
        (['LABEL', 'ABBREVIATION'], "The `LABEL` column describes the concept which is represented by the `ITEMID`. The `ABBREVIATION` column, only available in Metavision, lists a common abbreviation for the label."),
        (['DBSOURCE'], "The `DBSOURCE` column was generated to clarify which database the given `ITEMID` was sourced from: 'carevue' indicates the `ITEMID` was sourced from CareVue, while 'metavision' indicated the `ITEMID` was sourced from Metavision."),
        (['LINKSTO'], "`LINKSTO` provides the table name which the data links to. For example, a value of 'chartevents' indicates that the `ITEMID` of the given row is contained in CHARTEVENTS. A single `ITEMID` is only used in one event table, that is, if an `ITEMID` is contained in CHARTEVENTS it will *not* be contained in any other event table (e.g. IOEVENTS, CHARTEVENTS, etc)."),
        (['CATEGORY'], "`CATEGORY` provides some information of the type of data the `ITEMID` corresponds to. Examples include 'ABG', which indicates the measurement is sourced from an arterial blood gas, 'IV Medication', which indicates that the medication is administered through an intravenous line, and so on."),
        (['UNITNAME'], "`UNITNAME` specifies the unit of measurement used for the `ITEMID`. This column is not always available, and this may be because the unit of measurement varies, a unit of measurement does not make sense for the given data type, or the unit of measurement is simply missing. Note that there is sometimes additional information on the unit of measurement in the associated event table, e.g. the `VALUEUOM` column in CHARTEVENTS."),
        (['PARAM_TYPE'], "`PARAM_TYPE` describes the type of data which is recorded: a date, a number or a text field."),
    ]
}

##############
##D_LABITEMS##
##############

mimic3_dict['tables']['D_LABITEMS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['D_LABITEMS']['table_purpose'] = "Definition table for all laboratory measurements."
mimic3_dict['tables']['D_LABITEMS']['brief_summary'] = ""
mimic3_dict['tables']['D_LABITEMS']['important_considerations'] = [
    "The `ITEMID` from MIMIC-III v1.0 does *not* match the `ITEMID` from MIMIC-II v2.6. If a mapping between the two is necessary, please contact the guardians of the database.",
    "Many of the LOINC codes were assigned during a project to standardize the ontology of lab measurements in the MIMIC database. Consequently, the codes were assigned post-hoc, and may not be present for every lab measurement. We welcome improvements to the present codes or assignment of LOINC codes to unmapped data elements from the community."
]
mimic3_dict['tables']['D_LABITEMS']['detailed_description'] = {
    'summary': "`D_LABITEMS` contains definitions for all `ITEMID` associated with lab measurements in the MIMIC database. All data in LABEVENTS link to the D\_LABITEMS table. Each unique `LABEL` in the hospital database was assigned an `ITEMID` in this table, and the use of this `ITEMID` facilitates efficient storage and querying of the data. Note that lab items are kept separate while most definitions are contained in the D_ITEMS table, and there were good reasons to keep the lab items separate.\n\nAs the laboratory data is acquired from the hospital database, the data is consistent across all years in the database. Consequently, there is usually only one `ITEMID` associated with each concept in the database. Furthermore, the data contains information collected in departments outside the ICU. This includes both wards within the hospital and clinics outside the hospital. Most concepts in this table have been mapped to LOINC codes, an openly available ontology which provides a rich amount of information about the laboratory measurement including reference ranges, common units of measurement and other further detail regarding the measurement.",
    'entries': [
        (['ITEMID'], "As a candidate key in the table, `ITEMID` is unique to each row."),
        (['LABEL'], "The `LABEL` column describes the concept which is represented by the `ITEMID`."),
        (['FLUID'], "`FLUID` describes the substance on which the measurement was made. For example, chemistry measurements are frequently performed on blood, which is listed in this column as 'BLOOD'. Many of these measurements are also acquirable on other fluids, such as urine, and this column differentiates these distinct concepts."),
        (['CATEGORY'], "`CATEGORY` provides higher level information as to the type of measurement. For example, a category of 'ABG' indicates that the measurement is an arterial blood gas."),
        (['LOINC_CODE'], "`LOINC_CODE` contains the LOINC code associated with the given `ITEMID`. LOINC is an ontology which originally specified laboratory measurements but has since expanded to cover a wide range of clinically relevant concepts. LOINC openly provide a table which contains a large amount of detail about each LOINC code. This table is freely available online or can be provided by the guardians of the database."),
    ]
}

##################
##DATETIMEEVENTS##
##################

mimic3_dict['tables']['DATETIMEEVENTS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['DATETIMEEVENTS']['table_purpose'] = "Contains all date formatted data."
mimic3_dict['tables']['DATETIMEEVENTS']['brief_summary'] = ""
mimic3_dict['tables']['DATETIMEEVENTS']['important_considerations'] = []
mimic3_dict['tables']['DATETIMEEVENTS']['detailed_description'] = {
    'summary': "DATETIMEEVENTS contains all date measurements about a patient in the ICU. For example, the date of last dialysis would be in the DATETIMEEVENTS table, but the systolic blood pressure would not be in this table. As all dates in MIMIC-III are anonymized to protect patient confidentiality, all dates in this table have been shifted. Note that the chronology for an individual patient has been unaffected however, and quantities such as the difference between two dates remain true to reality.",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['ITEMID'], ""),
        (['CHARTTIME', 'STORETIME'], "`CHARTTIME` records the time at which an observation was charted, and is usually the closest proxy to the time the data was actually measured. `STORETIME` records the time at which an observation was manually input or manually validated by a member of the clinical staff."),
        (['CGID'], "`CGID` is the identifier for the caregiver who validated the given measurement."),
        (['VALUE'], ""),
        (['VALUEUOM'], ""),
        (['WARNING', 'ERROR'], ""),
        (['RESULTSTATUS', 'STOPPED'], ""),
    ]
}

#################
##DIAGNOSES_ICD##
#################

mimic3_dict['tables']['DIAGNOSES_ICD']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['DIAGNOSES_ICD']['table_purpose'] = "Contains ICD diagnoses for patients, most notably ICD-9 diagnoses."
mimic3_dict['tables']['DIAGNOSES_ICD']['brief_summary'] = ""
mimic3_dict['tables']['DIAGNOSES_ICD']['important_considerations'] = [
    "All ICD codes in MIMIC-III are ICD-9 based. The Beth Israel Deaconess Medical Center will begin using ICD-10 codes in 2015.",
    "The code field for the ICD-9-CM Principal and Other Diagnosis Codes is six characters in length, with the decimal point implied between the third and fourth digit for all diagnosis codes other than the V codes. The decimal is implied for V codes between the second and third digit."
]
mimic3_dict['tables']['DIAGNOSES_ICD']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['SEQ_NUM'], "`SEQ_NUM` provides the order in which the ICD diagnoses relate to the patient. ICD diagnoses are ordered by priority - and the order does have an impact on the reimbursement for treatment."),
        (['ICD9_CODE'], "`ICD9_CODE` contains the actual code corresponding to the diagnosis assigned to the patient for the given row. Note that all codes, as of MIMIC-III v1.0, are ICD-9 codes."),
    ]
}

############
##DRGCODES##
############

mimic3_dict['tables']['DRGCODES']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['DRGCODES']['table_purpose'] = "Contains diagnosis related groups (DRG) codes for patients."
mimic3_dict['tables']['DRGCODES']['brief_summary'] = ""
mimic3_dict['tables']['DRGCODES']['important_considerations'] = [
    "HCFA-DRG codes have multiple descriptions as they have changed over time. Sometimes these descriptions are similar, but sometimes they are *completely different diagnoses*. Users will need to select rows using both the code *and* the description.",
    "Since there are multiple versions of DRG codes, queries will need to incorporate both the type of DRG and the code when filtering for a certain diagnosis.",
    "All patients have an HCFA-DRG code, but not all patients have an APR-DRG code. Note that APR-DRG is believed to be an alternative, more specific, code which could be used in conjunction with the HCFA codes."
]
mimic3_dict['tables']['DRGCODES']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['DRG_TYPE'], "`DRG_TYPE` provides the type of DRG code in the entry. There are two types of DRG codes in the database which have overlapping ranges but distinct definitions for the codes. The two types of DRG codes in the MIMIC-III database are 'HCFA' (Health Care Financing Administration) and 'APR' (All Payers Registry)."),
        (['DRG_CODE'], "`DRG_CODE` contains a code which represents the diagnosis billed for by the hospital."),
        (['DESCRIPTION'], "`DESCRIPTION` provides a human understandable summary of the meaning of the given DRG code.\nThe description field frequently has acronyms which represent comorbidity levels (comorbid conditions or 'CC'). The following table provides a definition for some of these acronyms:\n\n\nAcronym | Description\n---- | ----\nw CC/MCC | with CC or Major CC\nw MCC | with Major CC\nw CC | with CC and without Major CC\nw NonCC | with NonCC and without CC or Major CC\nw/o MCC | with CC or Non CC and without Major CC\nw/o CC/MCC | with nonCC and without CC or Major CC\n\nNote that there are three levels of comorbidities: none, with comorbid conditions, and with major comorbid conditions. These acronyms are primarily used in HCFA/MS DRG codes."),
        (['DRG_SEVERITY', 'DRG_MORTALITY'], "`DRG_SEVERITY` and `DRG_MORTALITY` provide additional granularity to DRG codes in the 'APR' DRG type. Severity and mortality allow for higher billing costs when a diagnosis is more severe, and vice versa."),
    ]
}

############
##ICUSTAYS##
############

mimic3_dict['tables']['ICUSTAYS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['ICUSTAYS']['table_purpose']  = "Defines each ICUSTAY\_ID in the database, i.e. defines a single ICU stay."
mimic3_dict['tables']['ICUSTAYS']['brief_summary'] = ""
mimic3_dict['tables']['ICUSTAYS']['important_considerations'] = [
    "`ICUSTAY_ID` is a *generated* identifier that is *not* based on any raw data identifier. The hospital and ICU databases are not intrinsically linked and so do not have any concept of an ICU encounter identifier.",
    "The ICUSTAYS table is derived from the TRANSFERS table. Specifically, it groups the TRANSFERS table based on ICUSTAY\_ID, and excludes rows where no ICUSTAY\_ID is present."
]
mimic3_dict['tables']['ICUSTAYS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['DBSOURCE'], "`DBSOURCE` contains the original ICU database the data was sourced from. Patients admitted between 2001 - 2008 had their data managed by the CareVue information system, represented in this column as 'carevue'. Patients admitted between 2008 - 2012 had their data managed by the Metavision system, represented in this column as 'metavision'. Knowing the database source is important as the data archiving for these two databases differs in some cases."),
        (['FIRST_CAREUNIT', 'LAST_CAREUNIT'], "`FIRST_CAREUNIT` and `LAST_CAREUNIT` contain, respectively, the first and last ICU type in which the patient was cared for. As an `ICUSTAY_ID` groups all ICU admissions within 24 hours of each other, it is possible for a patient to be transferred from one type of ICU to another and have the same `ICUSTAY_ID`.\n\nCare units are derived from the TRANSFERS table, and definition for the abbreviations can be found in the documentation for TRANSFERS."),
        (['FIRST_WARDID', 'LAST_WARDID'], '`FIRST_WARDID` and `LAST_WARDID` contain the first and last ICU unit in which the patient stayed. Note the grouping of physical locations in the hospital database is referred to as ward. Though in practice ICUs are not referred to as wards, the hospital database technically tracks ICUs as "wards with an ICU cost center". As a result, each ICU is associated with a `WARDID`.'),
        (['INTIME', 'OUTTIME'], "`INTIME` provides the date and time the patient was transferred into the ICU. `OUTTIME` provides the date and time the patient was transferred out of the ICU."),
        (['LOS'], "`LOS` is the length of stay for the patient for the given ICU stay, which may include one or more ICU units."),
    ]
}

##################
##INPUTEVENTS_CV##
##################

mimic3_dict['tables']['INPUTEVENTS_CV']['table_source'] = 'CareVue ICU databases.'
mimic3_dict['tables']['INPUTEVENTS_CV']['table_purpose'] = "Input data for patients."
mimic3_dict['tables']['INPUTEVENTS_CV']['brief_summary'] = "A high level description of the data is available [here](/mimicdata/io)."
mimic3_dict['tables']['INPUTEVENTS_CV']['important_considerations'] = []
mimic3_dict['tables']['INPUTEVENTS_CV']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['CHARTTIME'], "`CHARTTIME` represents the time at which the measurement was charted - that is - recorded on the clinical information system at the bedside.\nFor amounts received (usually volumes), the `CHARTTIME` represents the time at which that volume was received.\nThat is, it can be considered an 'end time, i.e. X millilitres of solution was administered to the patient by this `CHARTTIME`.\nFor rates, the `CHARTTIME` represents the time at which that rate was set.\nThat is, it can be considered a 'start time', i.e. the patient is now receiving X mcg/kg/min of a drug at this `CHARTTIME`.\n\nthe [understanding IOEVENTS page](/mimicdata/ioevents) for a bit more detail about how this table was formed from the raw data."),
        (['ITEMID'], "Identifier for a single measurement type in the database. Each row associated with one `ITEMID` (e.g. 212) corresponds to an instantiation of the same measurement (e.g. heart rate).\nMetavision `ITEMID` values are all above 220000. A subset of commonly used medications in CareVue data have `ITEMID` values are between 30000-39999. The remaining input/output `ITEMID` values are between 40000-49999."),
        (['AMOUNT', 'AMOUNTUOM'], "`AMOUNT` and `AMOUNTUOM` list the amount of a drug or substance administered to the patient either between the `STARTTIME` and `ENDTIME` (if both are available) or at the `ENDTIME` (when the exact start time is unknown, but usually up to an hour before)."),
        (['RATE', 'RATEUOM'], "`RATE` and `RATEUOM` list the rate at which the drug or substance was administered to the patient either between the `STARTTIME` and `ENDTIME` (if both are available), or it lists the rate at which the drug is *currently* administered at the `ENDTIME`."),
        (['STORETIME'], "`STORETIME` records the time at which an observation was manually input or manually validated by a member of the clinical staff."),
        (['CGID'], "`CGID` is the identifier for the caregiver who validated the given measurement."),
        (['ORDERID', 'LINKORDERID'], "`ORDERID` links multiple items contained in the same solution together. For example, when a solution of noradrenaline and normal saline is administered both noradrenaline and normal saline occur on distinct rows but will have the same `ORDERID`.\n\n`LINKORDERID` links the same order across multiple instantiations: for example, if the rate of delivery for the solution with noradrenaline and normal saline is changed, two new rows which share the same new `ORDERID` will be generated, but the `LINKORDERID` will be the same."),
        (['STOPPED', 'NEWBOTTLE'], "`STOPPED` indicates whether the infusion has been disconnected or continued. `NEWBOTTLE` indicates if a new preparation of the solution was hung at the bedside."),
        (['ORIGINALAMOUNT', 'ORIGINALAMOUNTUOM', 'ORIGINALROUTE', 'ORIGINALRATE', 'ORIGINALRATEUOM', 'ORIGINALSITE'], "These columns provide information about the solution the medication was a part of when it was first entered into the information system.")
    ]
}

##################
##INPUTEVENTS_MV##
##################

mimic3_dict['tables']['INPUTEVENTS_MV']['table_source'] = 'Metavision ICU databases.'
mimic3_dict['tables']['INPUTEVENTS_MV']['table_purpose'] = 'Input data for patients.'
mimic3_dict['tables']['INPUTEVENTS_MV']['brief_summary'] = "A high level description of the data is available [here](/mimicdata/io)."
mimic3_dict['tables']['INPUTEVENTS_MV']['important_considerations'] = [
    'A bolus will be listed as ending one minute after it started, i.e. `ENDTIME` = `STARTTIME` + 1 minute',
]
mimic3_dict['tables']['INPUTEVENTS_MV']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['STARTTIME', 'ENDTIME'], "`STARTTIME` and `ENDTIME` record the start and end time of an input/output event."),
        (['ITEMID'], "Identifier for a single measurement type in the database. Each row associated with one `ITEMID` which corresponds to an instantiation of the same measurement (e.g. norepinephrine).\nMetaVision `ITEMID` values are all above 220000. Since this data only contains data from MetaVision, it only contains `ITEMID` above 220000 (see [here](/mimicdata/metavision/) for details about MetaVision)"),
        (['AMOUNT', 'AMOUNTUOM'], "`AMOUNT` and `AMOUNTUOM` list the amount of a drug or substance administered to the patient either between the `STARTTIME` and `ENDTIME`."),
        (['RATE', 'RATEUOM'], "`RATE` and `RATEUOM` list the rate at which the drug or substance was administered to the patient either between the `STARTTIME` and `ENDTIME`."),
        (['STORETIME'], "`STORETIME` records the time at which an observation was manually input or manually validated by a member of the clinical staff."),
        (['CGID'], "`CGID` is the identifier for the caregiver who validated the given measurement."),
        (['ORDERID', 'LINKORDERID'], "`ORDERID` links multiple items contained in the same solution together. For example, when a solution of noradrenaline and normal saline is administered both noradrenaline and normal saline occur on distinct rows but will have the same `ORDERID`.\n\n`LINKORDERID` links the same order across multiple instantiations: for example, if the rate of delivery for the solution with noradrenaline and normal saline is changed, two new rows which share the same new `ORDERID` will be generated, but the `LINKORDERID` will be the same."),
        (['ORDERCATEGORYNAME', 'SECONDARYORDERCATEGORYNAME', 'ORDERCOMPONENTTYPEDESCRIPTION', 'ORDERCATEGORYDESCRIPTION'], "These columns provide higher level information about the order the medication/solution is a part of. Categories represent the type of administration, while the `ORDERCOMPONENTTYPEDESCRIPTION` describes the role of the substance in the solution (i.e. main order parameter, additive, or mixed solution)"),
        (['PATIENTWEIGHT'], "The patient weight in kilograms."),
        (['TOTALAMOUNT', 'TOTALAMOUNTUOM'], "Intravenous administrations are usually given by hanging a bag of fluid at the bedside for continuous infusion over a certain period of time. These columns list the total amount of the fluid in the bag containing the solution."),
        (['STATUSDESCRIPTION'], "```STATUSDESCRIPTION``` states the ultimate status of the item. 'Stopped' indicates that the caregiver stopped the item or the programmed volume came to an end. 'Finished running' indicates that the programmed volume has come to an end. 'Rewritten' indicates that the caregiver rewrote the item, for example making an amendment to the starttime. 'Changed' indicates that the caregiver changed an item, for example setting a new rate or dose. Flushed' indicates that a line was flushed."),
        (['ISOPENBAG'], "Whether the order was from an open bag."),
        (['CONTINUEINNEXTDEPT'], "If the order ended on patient transfer, this field indicates if it continued into the next department (e.g. a floor)."),
        (['CANCELREASON'], "If the order was canceled, this column provides some explanation."),
        (['COMMENTS_STATUS', 'COMMENTS_TITLE', 'COMMENTS_DATE'], "Specifies if the order was edited or canceled, and if so, the date and job title of the care giver who canceled or edited it."),
        (['ORIGINALAMOUNT'], "Drugs are usually mixed within a solution and delivered continuously from the same bag. This column represents the amount of the drug contained in the bag at `STARTTIME`. For the first infusion of a new bag, `ORIGINALAMOUNT` = `TOTALAMOUNT`. Later on, if the rate is changed, then the amount of the drug in the bag will be lower (as some has been administered to the patient). As a result, `ORIGINALAMOUNT` < `TOTALAMOUNT`, and `ORIGINALAMOUNT` will be the amount of drug leftover in the bag at that `STARTTIME`."),
        (['ORIGINALRATE'], "This is the rate that was input by the care provider. Note that this may differ from `RATE` because of various reasons: `ORIGINALRATE` was the original planned rate, while the `RATE` column will be the true rate delivered. For example, if a a bag is about to run out and the care giver decides to push the rest of the fluid, then `RATE` > `ORIGINALRATE`.\nHowever, these two columns are usually the same, but have minor non-clinically significant differences due to rounding error."),
    ]
}

#############
##LABEVENTS##
#############

mimic3_dict['tables']['LABEVENTS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['LABEVENTS']['table_purpose'] = 'Contains all laboratory measurements for a given patient, including out patient data.'
mimic3_dict['tables']['LABEVENTS']['brief_summary'] = ""
mimic3_dict['tables']['LABEVENTS']['important_considerations'] = [
    "Note that the time associated with this result is the time of the fluid *acquisition*, not the time that the values were made available to the clinical staff.",
    'The labevents table contains both in-hospital laboratory measurements *and* out of hospital laboratory measurements from clinics which the patient has visited (since the patient is not "in" a hospital when visiting a clinic, these patients often referred to as "out patients" and the data is often called "out patient" data). Laboratory measurements for out patients **does not have a `HADM_ID`**.',
    "In MIMIC-III v1.0, there is a subset of patients for which the outpatient lab data is not available. They can be identified by checking for patients whose data *always* has an `HADM_ID`.",
    "In MIMIC-III v1.0, there is a subset of patients for which text laboratory data is missing. This primarily affects the blood gas type recorded with blood gases.",
    "Some items are duplicated between the labevents and chartevents tables. In cases where there is disagreement between measurements, labevents should be taken as the ground truth.",
]
mimic3_dict['tables']['LABEVENTS']['detailed_description'] = {
    'summary': "The `LABEVENTS` data contains information regarding laboratory based measurements. The process for acquiring a lab measurement is as follows: first, a member of the clinical staff acquires a fluid from a site in the patient's body (e.g. blood from an arterial line, urine from a catheter, etc). Next, the fluid is bar coded to associate it with the patient *and* timestamped to record the time of the fluid acquisition. The lab analyses the data and returns a result within 4-12 hours.",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['ITEMID'], "Identifier for a single measurement type in the database. Each row associated with one `ITEMID` (e.g. 212) corresponds to an instantiation of the same measurement (e.g. heart rate)."),
        (['CHARTTIME'], "`CHARTTIME` records the time at which an observation was charted, and is usually the closest proxy to the time the data was actually measured.\nNote that because the data is directly sourced from the laboratory database, it is *not* validated by ICU clinical staff, and as a result there is no associated `STORETIME`."),
        (['VALUE', 'VALUENUM'], "`VALUE` contains the value measured for the concept identified by the `ITEMID`. If this value is numeric, then `VALUENUM` contains the same data in a numeric format. If this data is not numeric, `VALUENUM` is null. In some cases (e.g. scores like Glasgow Coma Scale, Richmond Sedation Agitation Scale and Code Status), `VALUENUM` contains the score and `VALUE` contains the score and text describing the meaning of the score."),
        (['VALUEUOM'], "`VALUEUOM` is the unit of measurement for the `VALUE`, if appropriate."),
        (['FLAG'], "`FLAG` indicates whether the laboratory value is considered abnormal or not, using pre-defined thresholds."),
    ]
}

######################
##MICROBIOLOGYEVENTS##
######################

mimic3_dict['tables']['MICROBIOLOGYEVENTS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['MICROBIOLOGYEVENTS']['table_purpose'] = "Contains microbiology information, including tests performed and sensitivities."
mimic3_dict['tables']['MICROBIOLOGYEVENTS']['brief_summary'] = ""
mimic3_dict['tables']['MICROBIOLOGYEVENTS']['important_considerations'] = [
    "The MICROBIOLOGYEVENTS table does not contain cultures from samples taken outside the ICU",
    "If the specimen is null, then the culture had no growth reported."
]
mimic3_dict['tables']['MICROBIOLOGYEVENTS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['CHARTDATE', 'CHARTTIME'], "`CHARTTIME` records the time at which an observation was charted, and is usually the closest proxy to the time the data was actually measured.\n`CHARTDATE` is the same as `CHARTTIME`, except there is no time available.\n\n`CHARTDATE` was included as time information is not always available for microbiology measurements: in order to be clear about when this occurs, `CHARTTIME` is null, and `CHARTDATE` contains the date of the measurement.\n\nIn the cases where both `CHARTTIME` and `CHARTDATE` exists, `CHARTDATE` is equal to a truncated version of `CHARTTIME` (i.e. `CHARTTIME` without the timing information). Not all observations have a `CHARTTIME`, but all observations have a `CHARTDATE`."),
        (['SPEC_ITEMID', 'SPEC_TYPE_CD', 'SPEC_TYPE_DESC'], "Details the itemid, code, and description for the specimen."),
        (['ORG_ITEMID', 'ORG_CD', 'ORG_NAME'], ),
        (['ISOLATE_NUM'], ),
        (['AB_ITEMID', 'AB_CD', 'AB_NAME'], ),
        (['DILUTION_TEXT', 'DILUTION_COMPARISON', 'DILUTION_VALUE'], ),
        (['INTERPRETATION'], '`INTERPRETATION` indicates the results of the test. "S" is sensitive, "R" is resistant, "I" is intermediate, and "P" is pending.'),
    ]
}

##############
##NOTEEVENTS##
##############

mimic3_dict['tables']['NOTEEVENTS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['NOTEEVENTS']['table_purpose'] = "Contains all notes for patients."
mimic3_dict['tables']['NOTEEVENTS']['brief_summary'] = ""
mimic3_dict['tables']['NOTEEVENTS']['important_considerations'] = [
    "`TEXT` is often large and contains many newline characters: it may be easier to read if viewed in a distinct program rather than the one performing the queries.",
    "Echo reports, ECG reports, and radiology reports are available for both inpatient and outpatient stays. If a patient is an outpatient, there will not be an `HADM_ID` associated with the note. If the patient is an inpatient, but was not admitted to the ICU for that particular hospital admission, then there will *not* be an `HADM_ID` associated with the note.",
    'Echos are generated using templates and in some cases there may be discrepancies in severity.  For example one report may contain: "Mild PA systolic hypertension. Severe PA systolic hypertension." indicating that the caregiver may not have removed the appropriate item from the template.',
]
mimic3_dict['tables']['NOTEEVENTS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['CHARTDATE'], "`CHARTDATE` records the date at which the note was charted."),
        (['CATEGORY', 'DESCRIPTION'], "`CATEGORY` and `DESCRIPTION` define the type of note recorded. For example, a `CATEGORY` of 'Discharge' indicates that the note is a discharge note, and a `DESCRIPTION` of 'Summary' in conjunction with this indicates that the note is a discharge summary."),
        (['CGID'], "`CGID` is the identifier for the caregiver who input the note."),
        (['ISERROR'], "A '1' in the `ISERROR` column indicates that a physician has identified this note as an error. "),
        (['TEXT'], "`TEXT` contains the note text."),
    ]
}

################
##OUTPUTEVENTS##
################

mimic3_dict['tables']['OUTPUTEVENTS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['OUTPUTEVENTS']['table_purpose'] = "Output data for patients."
mimic3_dict['tables']['OUTPUTEVENTS']['brief_summary'] = ""
mimic3_dict['tables']['OUTPUTEVENTS']['important_considerations'] = []
mimic3_dict['tables']['OUTPUTEVENTS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['CHARTTIME'], "`CHARTTIME` is the time of an output event."),
        (['ITEMID'], "Identifier for a single measurement type in the database. Each row associated with one `ITEMID` (e.g. 212) corresponds to an instantiation of the same measurement (e.g. heart rate).\n\nMetavision `ITEMID` values are all above 220000. A subset of commonly used medications in CareVue data have `ITEMID` values are between 30000-39999. The remaining input/output `ITEMID` values are between 40000-49999."),
        (['VALUE', 'VALUEUOM'], "`VALUE` and `VALUEUOM` list the amount of a substance at the `CHARTTIME` (when the exact start time is unknown, but usually up to an hour before)."),
        (['STORETIME'], "`STORETIME` records the time at which an observation was manually input or manually validated by a member of the clinical staff."),
        (['CGID'], "`CGID` is the identifier for the caregiver who validated the given measurement."),
        (["STOPPED", "NEWBOTTLE", "ISERROR"], "`STOPPED` indicates if the order was disconnected at the given `CHARTTIME`. `NEWBOTTLE` indicates that a new bag of solution was hung at the given `CHARTTIME`. `ISERROR` is a Metavision checkbox where a care giver can specify that an observation is an error. No other details are provided."),
    ]
}

############
##PATIENTS##
############

mimic3_dict['tables']['PATIENTS']['table_source'] = 'CareVue and Metavision ICU databases.'
mimic3_dict['tables']['PATIENTS']['table_purpose'] = "Contains all charted data for all patients."
mimic3_dict['tables']['PATIENTS']['brief_summary'] = ""
mimic3_dict['tables']['PATIENTS']['important_considerations'] = [
    "`DOB` has been shifted for patients older than 89. The median age for the patients whose date of birth was shifted is 91.4."
]
mimic3_dict['tables']['PATIENTS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID'], "`SUBJECT_ID` is a unique identifier which specifies an individual patient. `SUBJECT_ID` is a candidate key for the table, so is unique for each row. Information that is consistent for the lifetime of a patient is stored in this table."),
        (['GENDER'], "`GENDER` is the genotypical sex of the patient."),
        (['DOB'], "`DOB` is the date of birth of the given patient. Patients who are older than 89 years old at any time in the database have had their date of birth shifted to obscure their age and comply with HIPAA. The shift process was as follows: the patient's age at their first admission was determined. The date of birth was then set to exactly 300 years before their first admission. <!-- As a result, all patients-->"),
        (['DOD', 'DOD_HOSP', 'DOD_SSN'], "`DOD` is the date of death for the given patient. `DOD_HOSP` is the date of death as recorded in the hospital database. `DOD_SSN` is the date of death from the social security database. Note that `DOD` merged together `DOD_HOSP` and `DOD_SSN`, giving priority to `DOD_HOSP` if both were recorded."),
        (['EXPIRE_FLAG'], "`EXPIRE_FLAG` is a binary flag which indicates whether the patient died, i.e. whether `DOD` is null or not. These deaths include both deaths within the hospital (`DOD_HOSP`) and deaths identified by matching the patient to the social security master death index (`DOD_SSN`)."),
    ]
}

#################
##PRESCRIPTIONS##
#################

mimic3_dict['tables']['PRESCRIPTIONS']['table_source'] = 'Hospital provider order entry database.'
mimic3_dict['tables']['PRESCRIPTIONS']['table_purpose'] = "Contains medication related order entries, i.e. prescriptions."
mimic3_dict['tables']['PRESCRIPTIONS']['brief_summary'] = ""
mimic3_dict['tables']['PRESCRIPTIONS']['important_considerations'] = [
    "The table does *not* specify if an order was later cancelled as of MIMIC-III v1.0.",
]
mimic3_dict['tables']['PRESCRIPTIONS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['STARTTIME', 'ENDTIME'], "`STARTTIME` and `ENDTIME` specify the time period for which the prescription was valid."),
        (['DRUG_TYPE'], '`DRUG_TYPE` provides the type of drug prescribed.'),
        (['DRUG', 'DRUG_NAME_POE', 'DRUG_NAME_GENERIC'], "These columns are various representations of the drug prescribed to the patient."),
        (['FORMULARY_DRUG_CD', 'GSN', 'NDC'], "These columns provide a representation of the drug in various coding systems. `GSN` is the Generic Sequence Number. `NDC` is the [National Drug Code](https://en.wikipedia.org/wiki/National_Drug_Code)"),
        (['PROD_STRENGTH'], ""),
        (['DOSE_VAL_RX', 'DOSE_UNIT_RX'], ""),
        (['FORM_VAL_DISP', 'FORM_UNIT_DISP'], ""),
        (['ROUTE'], "The route prescribed for the drug."),
    ]
}

######################
##PROCEDUREEVENTS_MV##
######################

mimic3_dict['tables']['PROCEDUREEVENTS_MV']['table_source'] = 'Metavision ICU database.'
mimic3_dict['tables']['PROCEDUREEVENTS_MV']['table_purpose'] = "Contains procedures for patients"
mimic3_dict['tables']['PROCEDUREEVENTS_MV']['brief_summary'] = ""
mimic3_dict['tables']['PROCEDUREEVENTS_MV']['important_considerations'] = []
mimic3_dict['tables']['PROCEDUREEVENTS_MV']['detailed_description'] = {
    'summary': "",
    'entries': [
    ]
}

##################
##PROCEDURES_ICD##
##################

mimic3_dict['tables']['PROCEDURES_ICD']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['PROCEDURES_ICD']['table_purpose'] = "Contains ICD procedures for patients, most notably ICD-9 procedures."
mimic3_dict['tables']['PROCEDURES_ICD']['brief_summary'] = ""
mimic3_dict['tables']['PROCEDURES_ICD']['important_considerations'] = [
    "In MIMIC-III v1.0, only ICD-9 codes are used for recording procedures.",
]
mimic3_dict['tables']['PROCEDURES_ICD']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['SEQ_NUM'], "`PROC_SEQ_NUM` provides the order in which the procedures were performed."),
        (['ICD9_CODE'], "`CODE` provides the code for the given procedure."),
    ]
}

############
##SERVICES##
############

mimic3_dict['tables']['SERVICES']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['SERVICES']['table_purpose'] = "Lists services that a patient was admitted/transferred under."
mimic3_dict['tables']['SERVICES']['brief_summary'] = "The services table describes the service that a patient was admitted under. While a patient can be physicially located at a given ICU type (say MICU), they are not necessarily being cared for by the team which staffs the MICU. This can happen due to a number of reasons, including bed shortage. The SERVICES table should be used if interested in identifying the type of service a patient is receiving in the hospital. For example, if interested in identifying surgical patients, the recommended method is searching for patients admitted under a surgical service.\n\nEach service is listed in the table as an abbreviation - this is exactly how the data is stored in the hospital database. For user convenience, we have provided a description of each service type.\n\nService | Description\n--- | ---\nCMED | Cardiac Medical - for non-surgical cardiac related admissions\nCSURG | Cardiac Surgery - for surgical cardiac admissions\nDENT | Dental - for dental/jaw related admissions\nENT | Ear, nose, and throat - conditions primarily affecting these areas\nGU | Genitourinary - reproductive organs/urinary system\nGYN | Gynecological - female reproductive systems and breasts\nMED | Medical - general service for internal medicine\nNB | Newborn - infants born at the hospital\nNBB | Newborn baby - infants born at the hospital\nNMED | Neurologic Medical - non-surgical, relating to the brain\nNSURG | Neurologic Surgical - surgical, relating to the brain\nOBS | Observation - not ill enough for a service but kept in hospital for observation\nORTHO | Orthopaedic - surgical, relating to the musculoskeletal system\nOMED | Orthopaedic medicine - non-surgical, relating to musculoskeletal system\nPSURG | Plastic - restortation/reconstruction of the human body (including cosmetic or aesthetic)\nPSYCH | Psychiatric - mental disorders relating to mood, behaviour, cognition, or perceptions\nSURG | Surgical - general surgical service not classified elsewhere\nTRAUM | Trauma - injury or damage caused by physical harm from an external source\nTSURG | Thoracic Surgical - surgery on the thorax, located between the neck and the abdomen\nVSURG | Vascular Surgical - surgery relating to the circulatory system"
mimic3_dict['tables']['SERVICES']['important_considerations'] = []
mimic3_dict['tables']['SERVICES']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient and `HADM_ID` is unique to a patient hospital stay."),
        (['TRANSFERTIME'], '`TRANSFERTIME` is the time at which the patient moved from the `PREV_SERVICE` (if present) to the `CURR_SERVICE`.'),
        (['PREV_SERVICE', 'CURR_SERVICE'], "`PREV_SERVICE` and `CURR_SERVICE` are the previous and current service that the patient resides under."),
    ]
}

#############
##TRANSFERS##
#############

mimic3_dict['tables']['TRANSFERS']['table_source'] = 'Hospital database.'
mimic3_dict['tables']['TRANSFERS']['table_purpose'] = "Physical locations for patients throughout their hospital stay."
mimic3_dict['tables']['TRANSFERS']['brief_summary']  = ""
mimic3_dict['tables']['TRANSFERS']['important_considerations'] = [
    "The ICUSTAYS table is derived from this table.",
    "Care units are defined based off the `WARDID` being associated with an ICU cost center.",
    "ICUs in the Beth Israel have moved throughout the years, and consequently the same `WARDID` may be considered as an ICU for patient A but not an ICU for patient B."
]
mimic3_dict['tables']['TRANSFERS']['detailed_description'] = {
    'summary': "",
    'entries': [
        (['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID'], "Identifiers which specify the patient: `SUBJECT_ID` is unique to a patient, `HADM_ID` is unique to a patient hospital stay and `ICUSTAY_ID` is unique to a patient ICU stay."),
        (['DBSOURCE'], "`DBSOURCE` contains the original ICU database the data was sourced from. Patients admitted between 2001 - 2008 had their data managed by the CareVue information system, represented in this column as 'carevue'. Patients admitted between 2008 - 2012 had their data managed by the Metavision system, represented in this column as 'metavision'. Knowing the database source is important as the data archiving for these two databases differs in some cases."),
        (['EVENTTYPE'], "`EVENTTYPE` describes what transfer event occurred: 'admit' for an admission, 'transfer' for an inter-hospital transfer and 'discharge' for a discharge from the hospital."),
        (['PREV_CAREUNIT', 'CURR_CAREUNIT'], "`PREV_CAREUNIT` contains the care unit in which the patient previously resided. `CURR_CAREUNIT` contains the care unit in which the patient currently resides. The care unit is defined based upon the ward: if the ward is an ICU cost center, then the care unit defines the type of ICU. If the ward is *not* an ICU then in most cases the care unit is null. There are one or two exceptions to this rule. For example, ```NWARD``` is a ward for newborns.\n\nThe `INTIME` and `OUTTIME` of the transfer event correspond to the `CURR_CAREUNIT`. The `PREV_CAREUNIT` for each row is provided for convenience, and is identical to the `CURR_CAREUNIT` of the previous row (assuming the event is not an admission).\n\nCare units include the following:Care unit | Description\n--- | ---\nCCU | Coronary care unit\nCSRU | Cardiac surgery recovery unit\nMICU | Medical intensive care unit\nNICU | Neonatal intensive care unit\nNWARD | Neonatal ward\nSICU | Surgical intensive care unit\nTSICU | Trauma/surgical intensive care unit"),
        (['PREV_WARDID', 'CURR_WARDID'], '`PREV_WARDID` and `CURR_WARDID` contain the previous and current ward in which the patient stayed. Note that the grouping of physical locations in the hospital database is referred to as a ward. Though in practice ICUs are not referred to as wards, the hospital database technically tracks ICUs as "wards with an ICU cost center". As a result, each ICU is associated with a `WARDID`, but not every `WARDID` is an ICU.'),
        (['INTIME', 'OUTTIME'], "`INTIME` provides the date and time the patient was transferred into the current care unit from the previous care unit. `OUTTIME` provides the date and time the patient was transferred out of the current care unit."),
        (['LOS'], "`LOS` is the length of stay for the patient for the given ward stay, which may be within or outside of the ICU."),
    ]
}


mimic3_dict['icu'] = {
    'CCU': 'Coronary care unit',
    'CSRU': 'Cardiac surgery recovery unit',
    'MICU': 'Medical intensive care unit',
    'NICU': 'Neonatal intensive care unit',
    'NWARD': 'Neonatal ward',
    'SICU': 'Surgical intensive care unit',
    'TSICU': 'Trauma/surgical intensive care unit',
}

mimic3_dict['signals'] = {
    'HR': 'Heart Rate',
    '%': '% O2',
    'RESP': 'Respiration from thoracic impedance',
    'PULSE': 'Pulse',
    'NBP': 'Noninvasive Blood Pressure',
    'NBPSys': 'NBP Systolic',
    'NBPDias': 'NBP Diastolic',
    'NBPMean': 'NBP Diastolic',
    'ABP': 'Arterial Blood Pressure',
    'ABPSys': 'ABP Systolic',
    'ABPDias': 'ABP Diastolic',
    'ABPMean': 'ABP Diastolic',
    'PAP': 'Pulmonary Arterial Pressure',
    'PAPSys': 'PAP Systolic',
    'PAPDias': 'PAP Diastolic',
    'PAPMean': 'PAP Mean',
    'CO': 'Cardiac output',
    'PVC': 'Premature Ventricular Contraction',
    'PAWP': 'Pulmonary Artery Wedge Pressure',
    'ST': 'ECG segment',
    'ST_I': 'ECG segment II',
    'ST_II': 'ECG segment II',
    'RAP': 'Right Arterial Pressure',
    'RAP_1/3': 'Right Arterial Pressure 1/3',
    'RAP_2/3': 'Right Arterial Pressure 2/3',
    'RAP_3/3': 'Right Arterial Pressure 3/3',
    'AOBP': 'Automated Office Blood Pressure',
    'ICP': 'Intracranial Pressure',
    'UAP': 'Uterine Arterial Pressure',
    'ART': 'Arterial Blood Pressure',
    'dSpO2': 'Oxygen Saturation',
    'CVP': 'Central Venous Pressure',
    'CVP_1/3': 'Central Venous Pressure 1/3',
    'CVP_2/3': 'Central Venous Pressure 2/3',
    'CVP_3/3': 'Central Venous Pressure 3/3',
    'P1': 'P1',
    'UVP': 'Uterine Venous Pressure',
    'PLETH': 'Plethysmograph',
    'AVF': 'ECG AVF',
    'AVL': 'ECG AVL',
    'AVR': 'ECG AVR',
    'LAP': 'Left Atrial Pressure',
    'I': 'ECG I',
    'II': 'ECG II',
    'III': 'ECG III',
    'MCL': 'ECG MCL',
    'MCL1': 'ECG MCL1',
    'P1': 'P1',
    'V': 'ECG V',
    'V1': 'ECG V1',
    'V2': 'ECG V2',
}

mimic3_dict['services'] = {
    'CMED': 'Cardiac Medical - for non-surgical cardiac related admissions',
    'CSURG': 'Cardiac Surgery - for surgical cardiac admissions',
    'DENT': 'Dental - for dental/jaw related admissions',
    'ENT': 'Ear, nose, and throat - conditions primarily affecting these areas',
    'GU': 'Genitourinary - reproductive organs/urinary system',
    'GYN': 'Gynecological - female reproductive systems and breasts',
    'MED': 'Medical - general service for internal medicine',
    'NB': 'Newborn - infants born at the hospital',
    'NBB': 'Newborn baby - infants born at the hospital',
    'NMED': 'Neurologic Medical - non-surgical, relating to the brain',
    'NSURG': 'Neurologic Surgical - surgical, relating to the brain',
    'OBS': 'Obstetrics - conerned with childbirth and the care of women giving birth',
    'ORTHO': 'Orthopaedic - surgical, relating to the musculoskeletal system',
    'OMED': 'Orthopaedic medicine - non-surgical, relating to musculoskeletal system',
    'PSURG': 'Plastic - restortation/reconstruction of the human body (including cosmetic or aesthetic)',
    'PSYCH': 'Psychiatric - mental disorders relating to mood, behaviour, cognition, or perceptions',
    'SURG': 'Surgical - general surgical service not classified elsewhere',
    'TRAUM': 'Trauma - injury or damage caused by physical harm from an external source',
    'TSURG': 'Thoracic Surgical - surgery on the thorax, located between the neck and the abdomen',
    'VSURG': 'Vascular Surgical - surgery relating to the circulatory system',
}


write_json(mimic3_json, mimic3_dict)

TypeError: expected str, bytes or os.PathLike object, not dict