In [6]:
import psycopg2
import pandas as pd

# Configuração do banco
db_user = "desafio_mimic"
db_password = "desafio_mimic"
db_host = "localhost"
db_port = "5432"
db_name = "mimiciv"

# Criar conexão
conn = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)

# Executar a query e carregar no pandas
query = """ 
WITH diag AS (
    SELECT
        hadm_id,
        subject_id,
        CASE WHEN icd_version = 9 THEN icd_code ELSE NULL END AS icd9_code,
        CASE WHEN icd_version = 10 THEN icd_code ELSE NULL END AS icd10_code
    FROM mimiciv_hosp.diagnoses_icd
),
com AS (
    SELECT
        ad.subject_id,
        ad.hadm_id,
        -- Myocardial infarction
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('410', '412')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('I21', 'I22')
            OR
            SUBSTR(icd10_code, 1, 4) = 'I252'
            THEN 1
            ELSE 0 END) AS myocardial_infarct,
        -- Congestive heart failure
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) = '428'
            OR
            SUBSTR(icd9_code, 1, 5) IN ('39891', '40201', '40211', '40291', '40401', '40403',
                '40411', '40413', '40491', '40493')
            OR
            SUBSTR(icd9_code, 1, 4) BETWEEN '4254' AND '4259'
            OR
            SUBSTR(icd10_code, 1, 3) IN ('I43', 'I50')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I099', 'I110', 'I130', 'I132', 'I255', 'I420',
                  'I425', 'I426', 'I427', 'I428', 'I429', 'P290')
            THEN 1
            ELSE 0 END) AS congestive_heart_failure,
        -- Hypertension
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) IN ('4019', '5723')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I272', 'K766', 'I10')
            OR
            SUBSTR(icd10_code, 1, 5) = 'I2720'
            THEN 1
            ELSE 0 END) AS hypertension,
        -- Hypoglycemia
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) = '2511'
            OR
            SUBSTR(icd10_code, 1, 5) IN ('E11649', 'E162')
            THEN 1
            ELSE 0 END) AS hypoglycemia,
        -- Hyperglycemia
        MAX(CASE WHEN
            SUBSTR(icd10_code, 1, 5) IN ('E0865', 'E1165')
            THEN 1
            ELSE 0 END) AS hyperglycemia,
        -- Tobacco use
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) IN ('3051')
            OR
            SUBSTR(icd9_code, 1, 5) = 'V1582'
            OR
            SUBSTR(icd10_code, 1, 4) = 'Z720'
            THEN 1
            ELSE 0 END) AS tobacco_use,
        -- Other Charlson comorbidities (remainder of your original conditions)
        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('440', '441')
            OR
            SUBSTR(icd9_code, 1, 4) IN ('0930', '4373', '4471', '5571', '5579', 'V434')
            OR
            SUBSTR(icd9_code, 1, 4) BETWEEN '4431' AND '4439'
            OR
            SUBSTR(icd10_code, 1, 3) IN ('I70', 'I71')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I731', 'I738', 'I739', 'I771', 'I790', 'I792',
                                         'K551', 'K558', 'K559', 'Z958', 'Z959')
            THEN 1
            ELSE 0 END) AS peripheral_vascular_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) BETWEEN '430' AND '438'
            OR
            SUBSTR(icd9_code, 1, 5) = '36234'
            OR
            SUBSTR(icd10_code, 1, 3) IN ('G45', 'G46')
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'I60' AND 'I69'
            OR
            SUBSTR(icd10_code, 1, 4) = 'H340'
            THEN 1
            ELSE 0 END) AS cerebrovascular_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) = '290'
            OR
            SUBSTR(icd9_code, 1, 4) IN ('2941', '3312')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('F00', 'F01', 'F02', 'F03', 'G30')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('F051', 'G311')
            THEN 1
            ELSE 0 END) AS dementia,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) BETWEEN '490' AND '505'
            OR
            SUBSTR(icd9_code, 1, 4) IN ('4168', '4169', '5064', '5081', '5088')
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'J40' AND 'J47'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'J60' AND 'J67'
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I278', 'I279', 'J684', 'J701', 'J703')
            THEN 1
            ELSE 0 END) AS chronic_pulmonary_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) = '725'
            OR
            SUBSTR(icd9_code, 1, 4) IN ('4465', '7100', '7101', '7102', '7103',
                                        '7104', '7140', '7141', '7142', '7148')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('M05', 'M06', 'M32', 'M33', 'M34')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('M315', 'M351', 'M353', 'M360')
            THEN 1
            ELSE 0 END) AS rheumatic_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('531', '532', '533', '534')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('K25', 'K26', 'K27', 'K28')
            THEN 1
            ELSE 0 END) AS peptic_ulcer_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('570', '571')
            OR
            SUBSTR(icd9_code, 1, 4) IN ('0706', '0709', '5733', '5734', '5738', '5739', 'V427')
            OR
            SUBSTR(icd9_code, 1, 5) IN ('07022', '07023', '07032', '07033', '07044', '07054')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('B18', 'K73', 'K74')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('K700', 'K701', 'K702', 'K703', 'K709', 'K713',
                  'K714', 'K715', 'K717', 'K760', 'K762',
                  'K763', 'K764', 'K768', 'K769', 'Z944')
            THEN 1
            ELSE 0 END) AS mild_liver_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) IN ('2500', '2501', '2502', '2503', '2508', '2509')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('E100', 'E101', 'E106', 'E108', 'E109', 'E110', 'E111',
                  'E116', 'E118', 'E119', 'E120', 'E121',
                  'E126', 'E128', 'E129', 'E130', 'E131',
                  'E136', 'E138', 'E139', 'E140', 'E141', 'E146', 'E148', 'E149')
            THEN 1
            ELSE 0 END) AS diabetes_without_cc,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) IN ('2504', '2505', '2506', '2507')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('E102', 'E103', 'E104', 'E105', 'E107', 'E112', 'E113',
                  'E114', 'E115', 'E117', 'E122', 'E123',
                  'E124', 'E125', 'E127', 'E132', 'E133',
                  'E134', 'E135', 'E137', 'E142', 'E143', 'E144', 'E145', 'E147')
            THEN 1
            ELSE 0 END) AS diabetes_with_cc,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('342', '343')
            OR
            SUBSTR(icd9_code, 1, 4) IN ('3341', '3440', '3441', '3442',
                                        '3443', '3444', '3445', '3446', '3449')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('G81', 'G82')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('G041', 'G114', 'G801', 'G802', 'G830',
                                         'G831', 'G832', 'G833', 'G834', 'G839')
            THEN 1
            ELSE 0 END) AS paraplegia,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('582', '585', '586', 'V56')
            OR
            SUBSTR(icd9_code, 1, 4) IN ('5880', 'V420', 'V451')
            OR
            SUBSTR(icd9_code, 1, 4) BETWEEN '5830' AND '5837'
            OR
            SUBSTR(icd9_code, 1, 5) IN (
                '40301', '40311', '40391', '40402', '40403',
                '40412', '40413', '40492', '40493')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('N18', 'N19')
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I120', 'I131', 'N032', 'N033', 'N034',
                                         'N035', 'N036', 'N037', 'N052', 'N053',
                                         'N054', 'N055', 'N056', 'N057', 'N250',
                                         'Z490', 'Z491', 'Z492', 'Z940', 'Z992')
            THEN 1
            ELSE 0 END) AS renal_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) BETWEEN '140' AND '172'
            OR
            SUBSTR(icd9_code, 1, 4) BETWEEN '1740' AND '1958'
            OR
            SUBSTR(icd9_code, 1, 3) BETWEEN '200' AND '208'
            OR
            SUBSTR(icd9_code, 1, 4) = '2386'
            OR
            SUBSTR(icd10_code, 1, 3) IN ('C43', 'C88')
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C00' AND 'C26'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C30' AND 'C34'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C37' AND 'C41'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C45' AND 'C58'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C60' AND 'C76'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C81' AND 'C85'
            OR
            SUBSTR(icd10_code, 1, 3) BETWEEN 'C90' AND 'C97'
            THEN 1
            ELSE 0 END) AS malignant_cancer,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 4) IN ('4560', '4561', '4562')
            OR
            SUBSTR(icd9_code, 1, 4) BETWEEN '5722' AND '5728'
            OR
            SUBSTR(icd10_code, 1, 4) IN ('I850', 'I859', 'I864', 'I982', 'K704', 'K711',
                  'K721', 'K729', 'K765', 'K766', 'K767')
            THEN 1
            ELSE 0 END) AS severe_liver_disease,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('196', '197', '198', '199')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('C77', 'C78', 'C79', 'C80')
            THEN 1
            ELSE 0 END) AS metastatic_solid_tumor,

        MAX(CASE WHEN
            SUBSTR(icd9_code, 1, 3) IN ('042', '043', '044')
            OR
            SUBSTR(icd10_code, 1, 3) IN ('B20', 'B21', 'B22', 'B24')
            THEN 1
            ELSE 0 END) AS aids
    FROM mimiciv_hosp.admissions ad
    LEFT JOIN diag
        ON ad.hadm_id = diag.hadm_id
    GROUP BY ad.subject_id, ad.hadm_id
),

subject_comorbidities AS (
    SELECT
        subject_id,
        SUM(myocardial_infarct) AS myocardial_infarct,
        SUM(congestive_heart_failure + hypertension +  hypoglycemia + hyperglycemia
        + tobacco_use + peripheral_vascular_disease + cerebrovascular_disease
        + dementia + chronic_pulmonary_disease + rheumatic_disease + peptic_ulcer_disease
        + mild_liver_disease+ diabetes_without_cc + diabetes_with_cc + paraplegia + renal_disease
        + malignant_cancer + severe_liver_disease + metastatic_solid_tumor + aids + myocardial_infarct) as total_comorbidities
    FROM com
    GROUP BY subject_id
),
tabelona AS (
    SELECT
        p.subject_id,
        p.anchor_age,
        p.gender,
        p.dod,
        sc.myocardial_infarct,
        sc.total_comorbidities
    FROM mimiciv_hosp.patients p
    LEFT JOIN subject_comorbidities sc ON p.subject_id = sc.subject_id
),

vasopressor AS (
    SELECT 
        e.subject_id,
        SUM(CASE WHEN i.label IN ('Vasopressin', 'Dopamine', 'Epinephrine', 'Norepinephrine', 
                                'Phenylephrine', 'Milrinone', 'Dobutamine') 
                THEN 1 ELSE 0 END) AS total_vasopressor_input_count
    FROM mimiciv_icu.d_items i
    JOIN mimiciv_icu.inputevents e ON i.itemid = e.itemid
    WHERE i.label IN ('Vasopressin', 'Dopamine', 'Epinephrine', 'Norepinephrine', 
                     'Phenylephrine', 'Milrinone', 'Dobutamine')
    GROUP BY e.subject_id
),

ate AS (
    SELECT 
        t.subject_id,
        t.anchor_age,
        t.gender,
        t.dod,
        t.myocardial_infarct,
        t.total_comorbidities,
        COALESCE(v.total_vasopressor_input_count, 0) AS total_vasopressor_input_count,
        i.outtime,
        ROW_NUMBER() OVER (PARTITION BY t.subject_id ORDER BY i.outtime DESC) AS rn
    FROM tabelona t
    LEFT JOIN vasopressor v ON t.subject_id = v.subject_id
    LEFT JOIN mimiciv_icu.icustays i ON i.subject_id = t.subject_id
),

filtro2 AS (
    SELECT
        subject_id,
        MAX(CASE WHEN dod <= outtime AND rn = 1 THEN 1 ELSE 0 END) AS icu_death
    FROM ate
    GROUP BY subject_id
),

ventilation AS (
    SELECT 
        i.subject_id,
        SUM(EXTRACT(EPOCH FROM (v.endtime - v.starttime))/60) AS total_ventilation_minutes
    FROM mimiciv_derived.ventilation v
    JOIN mimiciv_icu.icustays i ON i.stay_id = v.stay_id
    WHERE v.endtime IS NOT NULL AND v.starttime IS NOT NULL
    GROUP BY i.subject_id
)
SELECT
    a.subject_id,
    a.anchor_age,
    a.gender,
    a.dod,
    a.myocardial_infarct,
    a.total_comorbidities,
    a.total_vasopressor_input_count,
    COALESCE(v.total_ventilation_minutes, 0) AS total_ventilation_minutes,
    CASE WHEN f.icu_death = 1 THEN 'True' ELSE 'False' END AS icu_death,
    CASE WHEN a.dod IS NOT NULL THEN 'True' ELSE 'False' END AS death
FROM ate a
LEFT JOIN ventilation v ON a.subject_id = v.subject_id
LEFT JOIN filtro2 f ON a.subject_id = f.subject_id
WHERE a.rn = 1  -- Include patients even without ICU stays
ORDER BY a.subject_id
"""
df = pd.read_sql(query, conn)
df.to_csv("tabela_completa.csv", index=False)
# Fechar conexão
conn.close()

print(df)

  df = pd.read_sql(query, conn)


        subject_id  anchor_age gender         dod  myocardial_infarct  \
0         10000032          52      F  2180-09-09                 0.0   
1         10000048          23      F        None                 NaN   
2         10000058          33      F        None                 NaN   
3         10000068          19      F        None                 0.0   
4         10000084          72      M  2161-02-13                 0.0   
...            ...         ...    ...         ...                 ...   
364622    19999828          46      F        None                 0.0   
364623    19999829          28      F        None                 NaN   
364624    19999840          58      M  2164-09-17                 0.0   
364625    19999914          49      F        None                 NaN   
364626    19999987          57      F        None                 0.0   

        total_comorbidities  total_vasopressor_input_count  \
0                      14.0                              0   