In [None]:
import sqlite3
import pandas as pd 

In [None]:
##############################################################################
####################### CONNECT TO MIMIC #####################################
connection = sqlite3.connect("path to mimic")

#We can verify we successfully created our connection object by running:
print(connection.total_changes)
# Be sure to close the connection
#con.close()

# Create our test query
test_query = """
SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis
FROM admissions
"""

# Run the query and assign the results to a variable
test = pd.read_sql_query(test_query,connection)

print("TEST:", test.head())
##############################################################################
##################### QUERYING, MORTALITY COHORT #############################


###positive cohort

query = """

WITH icu_patients AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, pat.DOB, pat.gender, adm.admittime 
, (JulianDay(OUTTIME) - JulianDay(INTIME)) as icu_length_of_stay
, (JulianDay(icu.INTIME) - JulianDay(pat.DOB))/ 364.242 as age
, adm.hospital_expire_flag as mortality 


FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN admissions adm
    ON adm.subject_id = icu.subject_id
    AND adm.hadm_id = icu.hadm_id
)

SELECT * 
FROM icu_patients icu_cohort
WHERE icu_cohort.mortality == 1 AND icu_cohort.age >= 18 AND icu_cohort.icu_length_of_stay>=1

"""


positive_cohort = pd.read_sql_query(query, connection)

In [None]:

positive_cohort = positive_cohort.copy() #keeping the original icu patients
positive_cohort.drop_duplicates()

#the dataset contains dupicate values, so i will drop them 
positive_cohort = positive_cohort.drop_duplicates(keep='first')

In [None]:
positive_cohort

In [None]:
positive_cohort.hadm_id.value_counts()

In [None]:
positive_cohort[positive_cohort["hadm_id"] == 175448]


In [None]:
positive_cohort

## Negative Cohort

In [None]:

###negative cohort

query = """

WITH icu_patients AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, pat.DOB, pat.gender, adm.admittime 
, (JulianDay(OUTTIME) - JulianDay(INTIME)) as icu_length_of_stay
, (JulianDay(icu.INTIME) - JulianDay(pat.DOB))/ 364.242 as age
, adm.hospital_expire_flag 


FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN admissions adm
    ON adm.subject_id = icu.subject_id
    AND adm.hadm_id = icu.hadm_id
)

SELECT icu_cohort.subject_id, icu_cohort.hadm_id, icu_cohort.icustay_id, icu_cohort.DOB, icu_cohort.gender, icu_cohort.admittime,  icu_cohort.icu_length_of_stay, icu_cohort.age, icu_cohort.hospital_expire_flag as mortality
FROM icu_patients icu_cohort
WHERE icu_cohort.subject_id NOT IN(
    SELECT icu_cohort.subject_id
    FROM icu_patients icu_cohort
    WHERE icu_cohort.hospital_expire_flag == 1 AND icu_cohort.age >= 18 AND icu_cohort.icu_length_of_stay>=1
)
AND icu_cohort.age >= 18 AND icu_cohort.icu_length_of_stay>=1
ORDER BY icu_cohort.subject_id
"""


negative_cohort = pd.read_sql_query(query, connection)

In [None]:
negative_cohort

### Preprocessing Negagive (to mortality) ICU patients (Keep the oldest date for admission)

In [None]:
# exclude patients < 18 years old
patients = negative_cohort.copy() #keeping the original icu patients
patients.drop_duplicates()

#patients = patients[patients["exclusion_age"]==0] #keep patients older than 18

#the dataset contains dupicate values, so i will drop them 
patients = patients.drop_duplicates(keep='first')


In [None]:
patients[patients["subject_id"] == 18108]

In [None]:
patients.subject_id.value_counts().sort_values

In [None]:
#check if we actually dropped duplicates
patients[patients["subject_id"]==11861]

In [None]:
#i extracted patients with multiple hadm_id, so we need to keep only the first hadm_id for each patient. and for each hadm_id all icu_Stays_id (i need them for labs and stuff)


#count how  many visits in the icu each patient has 
counts = patients.groupby(patients.subject_id).nunique()["hadm_id"]

# index of patients with more than one hadm_id
# select multi visit patients from patients
idx_pat_multi_hadmid = counts[counts.values>1].index
multi_hadmid_patients = patients[patients.subject_id.isin(list(idx_pat_multi_hadmid))]

In [None]:
#extract the one with one unique hadm_id for later concat of negative cohort
uni_hadmid_patients = patients[~patients.subject_id.isin(list(idx_pat_multi_hadmid))]

In [None]:
uni_hadmid_patients.head()

In [None]:
multi_hadmid_patients.head()

In [None]:
#just checking
multi_hadmid_patients[multi_hadmid_patients['subject_id'] == 11861]

In [None]:
#convert to datetime
multi_hadmid_patients.admittime = pd.to_datetime(multi_hadmid_patients.admittime)

In [None]:
multi_hadmid_patients

In [None]:
#for the people that have multiple hadm_id, get the idx of the hadm_id with the min date
indices = multi_hadmid_patients.groupby("subject_id")['admittime'].idxmin()

In [None]:
indices

In [None]:
#get the df where the indices are in indices
sub_cohort_multi = multi_hadmid_patients.loc[indices]

In [None]:
#checking
sub_cohort_multi[sub_cohort_multi["subject_id"] == 11861]

In [None]:
sub_cohort_multi

In [None]:
#concating the negative cohort


negative_cohort = pd.concat([sub_cohort_multi, uni_hadmid_patients])

In [None]:
negative_cohort.head()

In [None]:
negative_cohort[negative_cohort['hadm_id'] == 163281]

### Concat final cohort with negative and positive

In [None]:
negative_cohort

In [None]:
negative_cohort.hadm_id.value_counts()

In [None]:
positive_cohort

In [None]:
patient_cohort = pd.concat([negative_cohort, positive_cohort])

In [None]:
patient_cohort[patient_cohort["subject_id"] == 27427]

In [None]:
patient_cohort[patient_cohort["hadm_id"] == 178137]

In [None]:
patient_cohort

## Extracting vitals

In [None]:
query = """
WITH ce as
(
    select ce.icustay_id, ce.charttime
        , (case when itemid in (211,220045) and valuenum > 0 and valuenum < 300 then valuenum else null end) as HeartRate
        , (case when itemid in (51,442,455,6701,220179,220050) and valuenum > 0 and valuenum < 400 then valuenum else null end) as SysBP
        , (case when itemid in (8368,8440,8441,8555,220180,220051) and valuenum > 0 and valuenum < 300 then valuenum else null end) as DiasBP
        , (case when itemid in (456,52,6702,443,220052,220181,225312) and valuenum > 0 and valuenum < 300 then valuenum else null end) as MeanBP
        , (case when itemid in (615,618,220210,224690) and valuenum > 0 and valuenum < 70 then valuenum else null end) as RespRate
        , (case when itemid in (223761,678) and valuenum > 70 and valuenum < 120 then (valuenum-32)/1.8 -- converted to degC in valuenum call
               when itemid in (223762,676) and valuenum > 10 and valuenum < 50  then valuenum else null end) as TempC
        , (case when itemid in (646,220277) and valuenum > 0 and valuenum <= 100 then valuenum else null end) as SpO2
        , (case when itemid in (807,811,1529,3745,3744,225664,220621,226537) and valuenum > 0 then valuenum else null end) as Glucose
    from chartevents ce
    
    WHERE ce.itemid in
    (
    -- HEART RATE
    211, --"Heart Rate"
    220045, --"Heart Rate"

    -- Systolic/diastolic

    51, --	Arterial BP [Systolic]
    442, --	Manual BP [Systolic]
    455, --	NBP [Systolic]
    6701, --	Arterial BP #2 [Systolic]
    220179, --	Non Invasive Blood Pressure systolic
    220050, --	Arterial Blood Pressure systolic

    8368, --	Arterial BP [Diastolic]
    8440, --	Manual BP [Diastolic]
    8441, --	NBP [Diastolic]
    8555, --	Arterial BP #2 [Diastolic]
    220180, --	Non Invasive Blood Pressure diastolic
    220051, --	Arterial Blood Pressure diastolic


    -- MEAN ARTERIAL PRESSURE
    456, --"NBP Mean"
    52, --"Arterial BP Mean"
    6702, --	Arterial BP Mean #2
    443, --	Manual BP Mean(calc)
    220052, --"Arterial Blood Pressure mean"
    220181, --"Non Invasive Blood Pressure mean"
    225312, --"ART BP mean"

    -- RESPIRATORY RATE
    618,--	Respiratory Rate
    615,--	Resp Rate (Total)
    220210,--	Respiratory Rate
    224690, --	Respiratory Rate (Total)


    -- SPO2, peripheral
    646, 220277,

    -- TEMPERATURE
    223762, -- "Temperature Celsius"
    676,	-- "Temperature C"
    223761, -- "Temperature Fahrenheit"
    678 --	"Temperature F"

    )
    )
select
    icustays.hadm_id
  , ce.charttime
  , avg(HeartRate) as HeartRate
  , avg(SysBP) as SysBP
  , avg(DiasBP) as DiasBP
  , avg(MeanBP) as MeanBP
  , avg(RespRate) as RespRate
  , avg(TempC) as TempC
  , avg(SpO2) as SpO2
  , avg(Glucose) as Glucose
from icustays
left join ce
on ce.icustay_id = icustays.icustay_id
group by icustays.hadm_id, ce.charttime
order by icustays.hadm_id, ce.charttime;
"""

In [None]:
vital  = pd.read_sql_query(query, connection)

In [None]:
vital

## Prescriptions

In [None]:
query = """
WITH filter_prescriptions AS
(
SELECT d.subject_id, adm.hadm_id, adm.admittime, d.icustay_id, d.drug as drug_name, CAST(d.NDC AS varchar) as NDC, d.startdate as drug_startdate, d.enddate as drug_enddate, d.prod_strength as drug_strength, d.dose_val_rx as drug_dosage, d.dose_unit_rx as drug_unit
FROM prescriptions d
INNER JOIN admissions adm
    ON adm.subject_id = d.subject_id
    AND adm.hadm_id = d.hadm_id

ORDER  BY d.SUBJECT_ID ASC
)
SELECT *
FROM filter_prescriptions 
"""

drugs = pd.read_sql_query(query, connection)

In [None]:
drugs

In [None]:
#del drugs


## Diagnoses

In [None]:
query = """
WITH filter_diagnoses AS 
(
    SELECT icd.subject_id, icd.hadm_id, admissions.admittime, icd.icd9_code, d_icd.short_title as icd9_title
    FROM diagnoses_icd icd
    INNER JOIN icustays, admissions
        ON icd.subject_id = icustays.subject_id
        AND icustays.hadm_id = admissions.hadm_id
    INNER JOIN d_icd_diagnoses d_icd
        ON icd.icd9_code = d_icd.icd9_code
)
SELECT *
FROM filter_diagnoses
ORDER BY subject_id
"""

diagnoses = pd.read_sql_query(query, connection)

In [None]:
diagnoses

## Vital cohort

In [None]:
vital_cohort = vital[vital.HADM_ID.isin(list(patient_cohort.hadm_id))]

In [None]:
p = patient_cohort.copy()
v = vital_cohort.copy()


In [None]:
v.rename(columns={"HADM_ID": "hadm_id"}, inplace=True)

In [None]:
v

In [None]:
p

In [None]:
f = p.merge(v, how='left', on='hadm_id')


In [None]:
f

In [None]:
f.to_csv("C:\\Users\\Maria\\Desktop\\data\\patients_vitals.csv")

In [None]:
data = pd.read_csv("C:\\Users\\Maria\\Desktop\\data\\patients_vitals.csv")

In [None]:
data 

In [None]:
drugs

In [None]:
drugs_cohort = drugs[drugs.subject_id.isin(list(data.subject_id))]

In [None]:
drugs_cohort.subject_id.sort_values()

In [None]:
diagnoses_cohort = diagnoses[diagnoses.subject_id.isin((data.subject_id))]

In [None]:
diagnoses.subject_id.sort_values()

In [None]:
drugs_cohort.to_csv("drugs.csv")

In [None]:
diagnoses_cohort.to_csv("diagnoses.csv")