In [None]:
import sqlite3
import pandas as pd 



In [None]:
##############################################################################
####################### CONNECT TO MIMIC #####################################
connection = sqlite3.connect("C:\\Users\\Maria\\Desktop\\Projects Data Scripts\\MIMIC\\data\\mimic3.db")

#We can verify we successfully created our connection object by running:
print(connection.total_changes)
# Be sure to close the connection
#con.close()

# Create our test query
test_query = """
SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis
FROM admissions
"""

# Run the query and assign the results to a variable
test = pd.read_sql_query(test_query,connection)

print("TEST:", test.head())
##############################################################################
##################### QUERYING, MORTALITY COHORT #############################

query = """

WITH icu_patients AS
(
SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, pat.DOB, pat.gender, icu.intime 
, (JulianDay(OUTTIME) - JulianDay(INTIME)) as icu_length_of_stay
, (JulianDay(icu.INTIME) - JulianDay(pat.DOB))/ 364.242 as age
, adm.hospital_expire_flag , icu.outtime 
, RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order 


FROM icustays icu
INNER JOIN patients pat
  ON icu.subject_id = pat.subject_id
INNER JOIN admissions adm
    ON adm.subject_id = icu.subject_id
    AND adm.hadm_id = icu.hadm_id
)


SELECT
    fa.subject_id, fa.hadm_id, fa.icustay_id, fa.icustay_id_order, fa.intime as icu_intime, fa.outtime as icu_outtime, fa.DOB, fa.GENDER, fa.age as patient_age, fa.hospital_expire_flag as mortality, fa.icu_length_of_stay
  , CASE 
        WHEN fa.icu_length_of_stay < 2 then 1
    ELSE 0 END
        as exclusion_los
  , CASE
        WHEN fa.age < 18 then 1
    ELSE 0 END
        as exclusion_age
FROM icu_patients fa

"""

icu = pd.read_sql_query(query, connection)

In [None]:
icu

### Preprocessing ICU patients

In [None]:
# exclude patients < 18 years old
patients = icu.copy() #keeping the original icu patients
patients.drop_duplicates()

patients = patients[patients["exclusion_age"]==0] #keep patients older than 18

#the dataset contains dupicate values, so i will drop them 
patients = patients.drop_duplicates(keep='first')


In [None]:
patients

In [None]:
#count how  many visits in the icu each patient has 
counts = patients.groupby("subject_id").count()["icustay_id_order"]

In [None]:
counts.sort_values()

In [None]:
#check if we actually dropped duplicates
patients[patients["subject_id"]==109]

In [None]:
# index of patients with one visit
# select multi visit patients from patients
idx_pat_univisit = counts[counts.values<=1].index
uni_visit_patients = patients[patients.subject_id.isin(list(idx_pat_univisit))]

In [None]:
uni_visit_patients.mortality.value_counts()