In [1]:
from connection_mimiciv import *

In [2]:
sql = f"""
    SELECT subject_id, hadm_id, stay_id, los
    FROM mimic4.mimiciv_icu.icustays
    WHERE first_careunit='Medical Intensive Care Unit (MICU)' AND last_careunit='Medical Intensive Care Unit (MICU)';
"""

pats = Connection().data_from_mimic_connection(sql)

In [3]:
stay_ids_tuple = tuple(pats['stay_id'].unique())

In [4]:
sql = f"""
    SELECT 
        icu.stay_id,
        icu.subject_id,
        icu.hadm_id,
        icu.los,
        pat.gender,
        pat.anchor_age + EXTRACT(YEAR FROM icu.intime) - pat.anchor_year AS age,
        CASE 
            WHEN adm.deathtime BETWEEN icu.intime AND icu.outtime THEN 1
            ELSE 0
        END AS icu_expire_flag,
        adm.hospital_expire_flag,
        adm.discharge_location,
        adm.deathtime,
        icu.intime,
        icu.outtime
    FROM mimiciv_icu.icustays icu
    JOIN mimiciv_hosp.patients pat ON icu.subject_id = pat.subject_id
    JOIN mimiciv_hosp.admissions adm ON icu.hadm_id = adm.hadm_id
    WHERE icu.stay_id IN {stay_ids_tuple};
"""
demographics = Connection().data_from_mimic_connection(sql)

In [5]:
demographics.loc[demographics['icu_expire_flag'] == 1, 'discharge_location'] = 'DIED IN ICU'
demographics = demographics[['gender','age','los','discharge_location']]

In [6]:
demographics.to_csv('demographics.csv', index=False)

In [7]:
demographics

Unnamed: 0,gender,age,los,discharge_location
0,F,63.0,1.677789,DIED
1,F,36.0,0.955255,HOME
2,F,69.0,0.980243,HOSPICE
3,M,77.0,1.990949,SKILLED NURSING FACILITY
4,M,44.0,0.839572,HOME
...,...,...,...,...
15122,F,44.0,0.545775,HOME HEALTH CARE
15123,M,76.0,1.358472,HOME
15124,M,60.0,0.970625,HOSPICE
15125,F,69.0,0.904653,HOME


#### Esta aparte la realicé en google colab para mejor manejo de la librería sdv

In [None]:
pip install sdv

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
from sdv.single_table import GaussianCopulaSynthesizer
from sdv.metadata import SingleTableMetadata


In [None]:
df = pd.read_csv('demographics.csv')

In [None]:
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(df)


synthesizer = GaussianCopulaSynthesizer(metadata)
synthesizer.fit(df)

synthetic_data = synthesizer.sample(15119)

In [None]:
synthetic_data.to_json('synthetic_data.json', orient='records')