In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
sns.set_context('paper', font_scale=1.75)

# Preparation data

In [None]:
labs = pd.read_csv('data/mimic/labs_first_day.csv', index_col = [0, 1])
outcomes = pd.read_csv('data/mimic/outcomes_first_day.csv', index_col = 0)

In [None]:
# For paper's tables
labs = pd.read_csv('data/mimic/labs_first_day_subselection.csv', index_col = [0, 1])
outcomes = pd.read_csv('data/mimic/outcomes_first_day_subselection.csv', index_col = 0)

### Analysis

In [None]:
labs.groupby('Patient').count().mean().sort_values()[-20:].plot.barh(figsize = (5,7))

### How many different labs in this period ?

In [None]:
number_events = ((~labs.isna()).sum(axis = 1) > 0).groupby('Patient').sum()
number_events.describe()

### In hospial mortality

In [None]:
print("In hospital mortality: {:.2f} % (verification event label: {:.2f} %)".format(100 * (~outcomes.Death.isna()).mean(), 100 * outcomes.Event.mean()))

# Analysis per admission day

In [None]:
admission = (outcomes.Day <= 4).replace({True: 'Weekdays', False: 'Weekends'})

In [None]:
outcomes.groupby(admission).count()

In [None]:
outcomes['INSURANCE'] = outcomes['INSURANCE'].replace({'Medicaid': 'Public', 'Government': 'Public', 'Medicare': 'Public'})

In [None]:
# To modify to study another group
outcome_we = outcomes.loc[admission == 'Weekends']
labs_we = labs.loc[labs.index.get_level_values(0).isin(admission[admission == 'Weekends'].index)]

outcome_wd = outcomes[admission != 'Weekends']
labs_wd = labs[labs.index.get_level_values(0).isin(admission[admission != 'Weekends'].index)]

In [None]:
# Prepreocess ethnicity
ethnicity = outcomes.ETHNICITY.copy()
ethnicity[ethnicity.str.contains("ASIAN")] = 'Asian'
ethnicity[ethnicity.str.contains("HISPANIC")] = 'Hispanic'
ethnicity[ethnicity.str.contains("BLACK")] = 'Black'
ethnicity[ethnicity.str.contains("WHITE")] = 'White'
ethnicity[(ethnicity != "Asian") & (ethnicity != "Hispanic") & (ethnicity != "Black") & (ethnicity != "White")] = 'Other'

outcomes.ETHNICITY = ethnicity

In [None]:
los_mean_we = outcome_we['Time'].mean()
los_std_we = outcome_we['Time'].std()
los_mean_wd = outcome_wd['Time'].mean()
los_std_wd = outcome_wd['Time'].std()
los = pd.DataFrame({'Mean': [outcomes['Time'].mean(), los_mean_wd, los_mean_we],
                    'Std': [outcomes['Time'].std(), los_std_wd, los_std_we]}, index = ['Population', 'Weekday', 'Weekend']).T

death_we = outcome_we['Event'].mean()
death_wd = outcome_wd['Event'].mean()
death = pd.DataFrame({'':[outcomes['Event'].mean(), death_wd, death_we]}, index = ['Population', 'Weekday', 'Weekend']).T*100

insurance_we = outcome_we['INSURANCE'].value_counts() / len(outcome_we)
insurance_wd = outcome_wd['INSURANCE'].value_counts() / len(outcome_wd)
insurance = pd.concat({"Population": outcomes['INSURANCE'].value_counts() / len(outcomes), "Weekday": insurance_wd, "Weekend": insurance_we}, axis = 1)*100

ethnicity_we = ethnicity.loc[admission == 'Weekends'].value_counts() / len(ethnicity.loc[admission == 'Weekends'])
ethnicity_wd = ethnicity.loc[admission == 'Weekdays'].value_counts() / len(ethnicity.loc[admission == 'Weekdays'])
ethnicity = pd.concat({"Population": ethnicity.value_counts() / len(ethnicity), "Weekday": ethnicity_wd, "Weekend": ethnicity_we}, axis = 1)*100

gender_we = outcome_we['GENDER'].replace({'M': 'Male', 'F': 'Female'}).value_counts() / len(outcome_we)
gender_wd = outcome_wd['GENDER'].replace({'M': 'Male', 'F': 'Female'}).value_counts() / len(outcome_wd)
gender = pd.concat({"Population": outcomes['GENDER'].replace({'M': 'Male', 'F': 'Female'}).value_counts() / len(outcomes), "Weekday": gender_wd, "Weekend": gender_we}, axis = 1)*100

In [None]:
for i in [7, 30]:
    selection = (outcomes.Remaining < i) & outcomes.Event
    print(i, selection.groupby(admission).mean(), selection.mean())

In [None]:
analysis = pd.concat({'LOS': los,
           'Death': death,
           'Gender': gender,
           'Ethnicity': ethnicity,
           'Insurance': insurance})
print(analysis.to_latex(float_format="{:0.2f}".format))
analysis

In [None]:
import scipy.stats

value = labs.groupby('Patient').mean().groupby(admission).apply(lambda x: pd.Series(["{:.2f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns))
value.loc['Test'] = ['{:.2f}'.format(scipy.stats.ttest_ind(labs_we[i].groupby('Patient').mean().dropna(), labs_wd[i].groupby('Patient').mean().dropna())[1]) for i in labs_wd.columns]
count = labs.groupby('Patient').count().groupby(admission).apply(lambda x: pd.Series(["{:.2f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns))
count.loc['Test'] = ['{:.2f}'.format(scipy.stats.ttest_ind(labs_we[i].groupby('Patient').count().dropna(), labs_wd[i].groupby('Patient').count().dropna())[1]) for i in labs_wd.columns]
table = pd.concat([value, count]).T
print(table.to_latex())
table

In [None]:
ordered_test = labs.groupby('Patient').count().sum(1).to_frame(name = 'Total tests ordered').join(
    (outcomes.Day <= 4).rename("Admission").replace({False: "Weekend", True: "Weekday"})).join(
    outcomes.INSURANCE.rename("Insurance").replace({"Medicare": "Public", "Medicaid": "Public", "Government": "Public", "Self Pay": "Private"})).join(
    outcomes.Death.isna().rename("Outcome").replace({True: "Discharge", False: "Death"})).join(
    outcomes.GENDER.rename("Sex")).join(
    outcomes.ETHNICITY.rename("Ethnicity").replace({"Other": "Non-White", "Black": "Non-White", "Hispanic": "Non-White", "Asian": "Non-White"}))

In [None]:
plt.figure(dpi = 1000)
ax = sns.violinplot(x = "Outcome", y = "Total tests ordered", hue = 'Admission', data = ordered_test, split = True, inner = 'quartile', palette = "Blues", cut = 0, fig_size = (10,5))
plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), title = 'Admission')
plt.ylim(20, 70)

In [None]:
labs_we.index.get_level_values(1).to_series().plot.density(xlim = (0,1))
labs_wd.index.get_level_values(1).to_series().plot.density()

In [None]:
labs_we.index.get_level_values(1).to_series().plot.hist(cumulative = True, weights = np.full(len(labs_we), 1 / len(labs_we.index.get_level_values(0).unique())))
labs_wd.index.get_level_values(1).to_series().plot.hist(cumulative = True, alpha = 0.5, weights =  np.full(len(labs_wd), 1 / len(labs_wd.index.get_level_values(0).unique())))

# Analyze available labs

Keep labs observed in 2 / 3 of populations

In [None]:
missing_data = labs.groupby('Patient').count()
missing_data[missing_data <= 0] = 0
missing_data[0 < missing_data] = 1

In [None]:
selection = missing_data.mean() > 2/3
selection = selection[selection].index

In [None]:
# Remove no observation
labs_subselection = labs[selection].dropna(how = 'all')

# Keep patients that have at least two measurements
patients = labs_subselection.groupby('Patient').size() > 1
patients = patients[patients].index
labs_subselection = labs_subselection[labs_subselection.index.get_level_values('Patient').isin(patients)]

# Update outcomes
outcomes_subselection = outcomes[outcomes.index.get_level_values('Patient').isin(labs_subselection.index.get_level_values('Patient'))]

In [None]:
# Update last observation time (if you remove the last observation time)
outcomes_subselection['Remaining'] = (outcomes_subselection.Time.loc[labs_subselection.index.get_level_values(0)] - labs_subselection.index.get_level_values(1)).groupby('Patient').last()

In [None]:
# Save subsets patients and labs
labs_subselection.to_csv('data/mimic/labs_first_day_subselection.csv')
outcomes_subselection.to_csv('data/mimic/outcomes_first_day_subselection.csv')

# Display observation process

Display a random selection for each outcomes of the observation process

In [None]:
np.random.seed(42)

In [None]:
n = 30
random_selection = {
    time: 
        {
            'Early Death\n(before end of week 1)': outcomes_time[outcomes_time.Event & (outcomes_time.Time < 7)].sample(n = n),
            'Later Death': outcomes_time[outcomes_time.Event & (outcomes_time.Time > 7)].sample(n = n),
            'Discharged': outcomes_time[outcomes_time.Event].sample(n = n) 
        } for time, outcomes_time in zip(['Weekday Admission', 'Weekend Admission'], [outcome_wd, outcome_we])
    }



In [None]:
for l in np.random.choice(labs.columns, size = 5):
    labs_display = labs[l].dropna()

    fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10, 8), sharex = True, sharey = True)
    fig.suptitle(l)

    for j, time in enumerate(random_selection.keys()):
        ax[0, j].set_title(time)
        for i, cat in enumerate(random_selection[time].keys()):
            ax[i, j].axes.yaxis.set_ticks([])
            ax[i, j].set_xlim([0, 24])
            ax[i, 0].set_ylabel(cat)
            for k, patient in enumerate(random_selection[time][cat].index):
                ax[i, j].plot([0, 24], [k, k], ls = '--', alpha = 0.5, c = 'w')
                try:
                    data_patient = labs_display.loc[patient].index
                    ax[i, j].scatter(24 * data_patient, [k] * len(data_patient), alpha = 0.8, marker = 'x')
                except:
                    pass
    ax[-1, 1].set_xlabel('Time (in hours)')
    ax[-1, 0].set_xlabel('Time (in hours)')
    plt.tight_layout()
    plt.show()

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10, 8), sharex = True, sharey = True)
fig.suptitle(l)

for li, l in enumerate(labs.columns):
    labs_display = labs[l].dropna()

    for j, time in enumerate(random_selection.keys()):
        ax[0, j].set_title(time)
        for i, cat in enumerate(random_selection[time].keys()):
            ax[i, j].axes.yaxis.set_ticks([])
            ax[i, j].set_xlim([0, 24])
            ax[i, 0].set_ylabel(cat)
            for k, patient in enumerate(random_selection[time][cat].index):
                if li == 0:
                    ax[i, j].plot([0, 24], [k, k], ls = '--', alpha = 0.5, c = 'w')
                try:
                    data_patient = labs_display.loc[patient].index
                    ax[i, j].scatter(24 * data_patient, [k] * len(data_patient), alpha = 0.8, marker = 'x')
                except:
                    pass
ax[-1, 1].set_xlabel('Time (in hours)')
ax[-1, 0].set_xlabel('Time (in hours)')
plt.tight_layout()
plt.show()