# Datasets for disease specific patients

In [None]:
import pandas as pd
import mimic_pipeline.utils as utils
import numpy as np

In [None]:
user = input("Enter your username: ")
password = input("Enter your password: ")
loader = utils.DataBaseLoader(user=user, password=password)

In [None]:
train = pd.read_csv("data/TRAIN-union-features-id.csv")
test = pd.read_csv("data/TEST-union-features-id.csv")
whole = pd.concat([train, test], axis=0)
whole.head()

In [None]:
whole.shape

In [None]:
disease_df = loader['disease_flag']

get disease specific flags

In [None]:
whole_flag_df = whole.merge(disease_df[['hadm_id', 'ami', 'heart_failure', 'akf', 'sepsis']], on='hadm_id', how='left')
print(whole_flag_df.shape)
whole_flag_df.head()

check stays are unique

In [None]:
len(whole_flag_df['hadm_id'].unique())

In [None]:
for label in ['ami', 'sepsis', 'akf', 'heart_failure']:
    print(whole_flag_df[label].unique())
    print(whole_flag_df[label].isna().sum())

## Acute Myocardial Infarction (AMI)

In [None]:
ami_df = whole_flag_df[whole_flag_df['ami'] == 1]
print(ami_df.shape)
print(f"Percentage of patients: {len(ami_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {ami_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert ami_df['ami'].unique() == [1]
ami_df = ami_df.drop(['heart_failure', 'akf', 'sepsis', 'ami'], axis=1)
ami_df.head()

In [None]:
ami_df.to_csv("data/mimic-disease/ami-union-features-id.csv", index=False)
ami_df = ami_df.drop(['subject_id', 'hadm_id', 'icustay_id'], axis=1)
ami_df.head()

In [None]:
ami_df.to_csv("data/mimic-disease/ami-union-features.csv", index=False)

## Sepsis/septicemia

In [None]:
sepsis_df = whole_flag_df[whole_flag_df['sepsis'] == 1]
print(sepsis_df.shape)
print(f"Percentage of patients: {len(sepsis_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {sepsis_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert sepsis_df['sepsis'].unique() == [1]
sepsis_df = sepsis_df.drop(['heart_failure', 'akf', 'ami', 'sepsis'], axis=1)
sepsis_df.head()

In [None]:
sepsis_df.to_csv("data/mimic-disease/sepsis-union-features-id.csv", index=False)
sepsis_df = sepsis_df.drop(['subject_id', 'hadm_id', 'icustay_id'], axis=1)
sepsis_df.head()

In [None]:
sepsis_df.to_csv("data/mimic-disease/sepsis-union-features.csv", index=False)

## Heart Failure

In [None]:
heart_failure_df = whole_flag_df[whole_flag_df['heart_failure'] == 1]
print(heart_failure_df.shape)
print(f"Percentage of patients: {len(heart_failure_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {heart_failure_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert heart_failure_df['heart_failure'].unique() == [1]
heart_failure_df = heart_failure_df.drop(['sepsis', 'akf', 'ami', 'heart_failure'], axis=1)
heart_failure_df.head()

In [None]:
heart_failure_df.to_csv("data/mimic-disease/heart_failure-union-features-id.csv", index=False)
heart_failure_df = heart_failure_df.drop(['subject_id', 'hadm_id', 'icustay_id'], axis=1)
heart_failure_df.head()

In [None]:
heart_failure_df.to_csv("data/mimic-disease/heart_failure-union-features.csv", index=False)

## Acute Kidney Failure

In [None]:
akf_df = whole_flag_df[whole_flag_df['akf'] == 1]
print(akf_df.shape)
print(f"Percentage of patients: {len(akf_df)/len(whole_flag_df)*100:.1f}%")
print(f"Mortality Rate: {akf_df['hospital_expire_flag'].replace({-1: 0}).mean()*100:.1f}%")

In [None]:
assert akf_df['akf'].unique() == [1]
akf_df = akf_df.drop(['sepsis', 'heart_failure', 'ami', 'akf'], axis=1)
akf_df.head()

In [None]:
akf_df.to_csv("data/mimic-disease/akf-union-features-id.csv", index=False)
akf_df = akf_df.drop(['subject_id', 'hadm_id', 'icustay_id'], axis=1)
akf_df.head()

In [None]:
akf_df.to_csv("data/mimic-disease/akf-union-features.csv", index=False)