# Generate Dataset for diseases

In [None]:
import mimic_pipeline.utils as utils
import pandas as pd

In [None]:
user = input("Enter your username: ")
password = input("Enter your password: ")
loader = utils.DataBaseLoader(user=user, password=password, dbname='eicu', schema='eicu')

In [None]:
eicu_df = pd.read_csv("data/eICU-union.csv")
print(eicu_df.shape)
eicu_df.head()

In [None]:
disease_df = loader['disease_flag']

In [None]:
disease_df.shape

In [None]:
disease_df.head()

get disease specific flags

In [None]:
whole_flag_df = eicu_df.merge(disease_df[['patientunitstayid', 'ami', 'heart_failure', 'akf', 'sepsis']], on='patientunitstayid', how='left')
whole_flag_df.shape

In [None]:
len(whole_flag_df['patientunitstayid'].unique())

In [None]:
for label in ['ami', 'sepsis', 'akf', 'heart_failure']:
    print(disease_df[label].unique())

In [None]:
for label in ['ami', 'sepsis', 'akf', 'heart_failure']:
    print(whole_flag_df[label].unique())
    print(whole_flag_df[label].isna().sum())

## Acute Myocardial Infarction (AMI)

In [None]:
ami_df = whole_flag_df[whole_flag_df['ami'] == 1]
print(ami_df.shape)
print(f"Percentage of patients: {len(ami_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {ami_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert ami_df['ami'].unique() == [1]
ami_df = ami_df.drop(['heart_failure', 'akf', 'sepsis', 'ami'], axis=1)
ami_df.head()

In [None]:
ami_df.to_csv("data/eicu-disease/ami-union-features-id.csv", index=False)
ami_df.head()

## Sepsis/Septicemia

In [None]:
sepsis_df = whole_flag_df[whole_flag_df['sepsis'] == 1]
print(sepsis_df.shape)
print(f"Percentage of patients: {len(sepsis_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {sepsis_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert sepsis_df['sepsis'].unique() == [1]
sepsis_df = sepsis_df.drop(['heart_failure', 'akf', 'ami', 'sepsis'], axis=1)
sepsis_df.head()

In [None]:
sepsis_df.to_csv("data/eicu-disease/sepsis-union-features-id.csv", index=False)

## Heart Failure

In [None]:
heart_failure_df = whole_flag_df[whole_flag_df['heart_failure'] == 1]
print(heart_failure_df.shape)
print(f"Percentage of patients: {len(heart_failure_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {heart_failure_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert heart_failure_df['heart_failure'].unique() == [1]
heart_failure_df = heart_failure_df.drop(['sepsis', 'akf', 'ami', 'heart_failure'], axis=1)
heart_failure_df.head()

In [None]:
heart_failure_df.to_csv("data/eicu-disease/heart_failure-union-features-id.csv", index=False)

## Acute Kidney Failure

In [None]:
akf_df = whole_flag_df[whole_flag_df['akf'] == 1]
print(akf_df.shape)
print(f"Percentage of patients: {len(akf_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {akf_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert akf_df['akf'].unique() == [1]
akf_df = akf_df.drop(['sepsis', 'heart_failure', 'ami', 'akf'], axis=1)
akf_df.head()

In [None]:
akf_df.to_csv("data/eicu-disease/akf-union-features-id.csv", index=False)