# Test Traditional Score Baseline for APACHE IV/IVa, SAPS II, OASIS

In [None]:
import mimic_pipeline as mmp
import mimic_pipeline.utils as utils

user = input("Enter your username: ")
password = input("Enter your password: ")
loader = utils.DataBaseLoader(user=user, password=password, dbname='eicu', schema='eicu')

In [None]:
apacheiv = loader.query('''
                        SELECT pa.uniquepid, apa.*
                        FROM patient pa
                        LEFT JOIN apachepatientresult apa
                            ON apa.patientunitstayid = pa.patientunitstayid
                        WHERE apa.apacheversion = 'IV'
                        ''')
apacheiva = loader.query('''
                         SELECT pa.uniquepid, apa.*
                         FROM patient pa
                         LEFT JOIN apachepatientresult apa
                             ON apa.patientunitstayid = pa.patientunitstayid
                         WHERE apa.apacheversion = 'IVa'
                        ''')

check patient group is similar

In [None]:
import pandas as pd
pd.testing.assert_frame_equal(apacheiv[['uniquepid', 'patientunitstayid']], apacheiva[['uniquepid', 'patientunitstayid']])
pd.testing.assert_series_equal(apacheiv['actualhospitalmortality'], apacheiva['actualhospitalmortality'])

In [None]:
apacheiv['patientunitstayid'].isna().sum().sum()

In [None]:
apacheiv['uniquepid'].isna().sum().sum()

## Check Baselines

In [None]:
baselines_df = loader['baselines']
baselines_df.head()

check null

In [None]:
assert all(baselines_df[col].isna().sum().sum() == 0 for col in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'uniquepid', 'patientunitstayid'])

check unique patient & first stay

In [None]:
assert baselines_df['uniquepid'].is_unique
assert baselines_df['patientunitstayid'].is_unique

In [None]:
# patient = loader['patient']
# patient = patient.sort_values('hospitaladmitoffset')
# first_stay_df = patient.groupby('uniquepid').last().reset_index()
first_stay_df = loader.query('''
                            with tmp as(
                            SELECT patientunitstayid, uniquepid
                            , ROW_NUMBER() OVER (PARTITION BY uniquepid ORDER BY hospitaladmitoffset DESC) as rn
                            FROM patient
                            )
                            select patientunitstayid, uniquepid
                            from tmp
                            where rn = 1
                             ''')
assert baselines_df['uniquepid'].isin(first_stay_df['uniquepid']).all()
assert baselines_df['patientunitstayid'].isin(first_stay_df['patientunitstayid']).all()

## Visualize Baselines

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('white')

In [None]:
from sklearn.metrics import roc_curve, auc, precision_recall_curve
import numpy as np
y_true = baselines_df['actualhospitalmortality']
for score in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob']:
    fpr, tpr, _ = roc_curve(y_true, baselines_df[score])
    ax = sns.lineplot(x=fpr, y=tpr, label=f"{score} AUROC: {auc(fpr, tpr):.3f}")
    ax.figure.set_size_inches(8, 8)
sns.lineplot(x=np.linspace(0,1), y=np.linspace(0,1), label='Random', linestyle='--', color='black') 
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

In [None]:
for score in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob']:
    precision, recall, _ = precision_recall_curve(y_true, baselines_df[score])
    ax = sns.lineplot(x=recall, y=precision, label=f"{score} AUPRC: {auc(recall, precision):.3f}")
    ax.figure.set_size_inches(8, 8)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()
plt.show()

In [None]:
assert all(baselines_df[col].isna().sum().sum() == 0 for col in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'uniquepid', 'patientunitstayid'])

In [None]:
for score in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob']:
    sns.histplot(baselines_df[score])
    plt.show()

In [None]:
for score in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob']:
    prob_true, prob_pred, stat, p = mmp.metric.get_calibration_curve(y_true, baselines_df[score], n_bins=10)
    ax = sns.lineplot(x=prob_pred, y=prob_true, label=f"{score} H stat: {stat:.3f}, p: {p:.3f}", marker='s')
    ax.figure.set_size_inches(8, 8)
sns.lineplot(x=np.linspace(0,1), y=np.linspace(0,1), label='Perfect', linestyle='--', color='black') 
plt.xlabel('Predicted Probability')
plt.ylabel('True Probability')
plt.legend()
plt.show()