In [None]:
# Load libraries and functions
%load_ext autoreload
%autoreload 2
%matplotlib inline
RANDOM_STATE = 42  # Pseudo-random state

from utils import *
sns.set_palette("tab10") # Default seaborn theme

# Extra libraries for this notebook
import cmprsk
from cmprsk import utils
from cmprsk.cmprsk import cuminc

In [None]:
# Upload dataset
fn_vae_data = glob.glob('./Updated*.pkl')
latest_fn_vae_data = max(fn_vae_data, key=os.path.getctime)

print("Loading... ",latest_fn_vae_data)
with open(latest_fn_vae_data, "rb") as f:
    vae_data_main = pickle.load(f)
print("Done")

# Risk of HARTI depending on ICU days

In [None]:
#### VA- and NVA-HARTI

# Define data
T = vae_data_main[['ID_subid', 'day_in_icu_bid']].groupby('ID_subid').max()
E1 = vae_data_main[['ID_subid', 'vap']].groupby('ID_subid').max()
E2 = vae_data_main[['ID_subid', 'non_vap_resp_hai']].groupby('ID_subid').max()

# Plot hazard function
fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(15,5))
BANDWIDTH = 5
LIM = 61
naf1 = NelsonAalenFitter()
naf1.fit(T, E1, timeline=range(0, LIM, 1), label='VA-HARTI')
naf1.plot_hazard(ax=ax2, bandwidth=BANDWIDTH)
add_at_risk_counts(naf1, ax=ax2)

naf2 = NelsonAalenFitter()
naf2.fit(T, E2, timeline=range(0, LIM, 1), label='NVA-HARTI')
naf2.plot_hazard(ax=ax2, bandwidth=BANDWIDTH)

ax2.text(2, 0.0001, 'bandwidth = '+ "%.0f" %BANDWIDTH, fontsize=12)
ax2.grid(linestyle='dotted', which='both', axis='both')
ax2.minorticks_on()
ax2.legend(loc='upper center', ncol=2)
ax2.set_xlabel('Days in the ICU')
ax2.set_title('Kernel-smoothed function of instantaneous hazard\nfor VA- and NVA-HARTI depending on the number of days in the ICU')
ax2.yaxis.set_tick_params(which='both', labelleft=True)
ax2.set_ylabel('Instantaneous hazard rate')


# Plot competing event function
E1 = E1.replace({0: 2})
E2 = E2.replace({True: 1})
E2 = E2.replace({False: 2})

cuminc_res1 = cuminc(T.values.reshape(-1), E1.values.reshape(-1))
for name, group in list(cuminc_res1.groups.items())[:1]:
    ax1.plot(group.time, group.est, label=name)
    ax1.fill_between(group.time, group.low_ci, group.high_ci, alpha=0.25)
    ax1.set_xlim(0, LIM, 1)
    
cuminc_res2 = cuminc(T.values.reshape(-1), E2.values.reshape(-1))
for name, group in list(cuminc_res2.groups.items())[:1]:
    ax1.plot(group.time, group.est, label=name)
    ax1.fill_between(group.time, group.low_ci, group.high_ci, alpha=0.25)
    ax1.set_xlim(0, LIM, 1)

ax1.grid(linestyle='dotted', which='both', axis='both')
ax1.minorticks_on()
ax1.legend(['VA-HARTI', 'NVA-HARTI'], loc='lower center', ncol=2)
ax1.set_ylabel('Cumulative probability of HARTI')
ax1.set_xlabel('Days in the ICU')
ax1.set_title('Cumulative probability of VA- and NVA-HARTI depending on the number\nof days in the ICU while accounting for competing events')
plt.tight_layout()
plt.savefig('./pictures/cuminc_VA_NVA_days.pdf', dpi=600)


In [None]:
# Show estimate table from cuminc VA-HARTI
df = []
for name, group in cuminc_res1.groups.items():
    df.append(group.time)
    df.append(group.est)
    df.append(name)
    
harti = pd.DataFrame(zip(pd.Series(df[0]), pd.Series(df[1])))
pd.set_option('display.max_rows', len(harti))
print(harti.tail(10))

# Added risk at first 10 days
h10 = harti.iloc[1:14, :]
print('\nEach day adds probability: ', h10.diff().mean().values)

In [None]:
# Calculate doubling time for exponential phase NVA-HARTI (15 = first 7 days)
v = harti.head(15).groupby(0).max()
rate = v.pct_change()[1].tail(6).mean()
print("Doubling time, days: ", np.log(2) / np.log(1+rate))
print("Doubling time, hours: ", (np.log(2) / np.log(1+rate)) * 24)

In [None]:
# Show estimate table from cuminc NVA-HARTI
df = []
for name, group in cuminc_res2.groups.items():
    df.append(group.time)
    df.append(group.est)
    df.append(name)
    
harti = pd.DataFrame(zip(pd.Series(df[0]), pd.Series(df[1])))
pd.set_option('display.max_rows', len(harti))
print(harti.tail(10))

# Added risk at first 10 days
h10 = harti.iloc[1:14, :].groupby(0).max()
print('\nEach day adds probability: ', h10.diff().mean().values)

In [None]:
# Calculate doubling time for exponential phase NVA-HARTI (15 = first 7 days)
v = harti.head(15).groupby(0).max()
rate = v.pct_change()[1].tail(6).mean()
print("Doubling time, days: ", np.log(2) / np.log(1+rate))
print("Doubling time, hours: ", (np.log(2) / np.log(1+rate)) * 24)

# Risk of VA-HARTI depending on ventilation-days

In [None]:
# define data
T = vae_data_main[['ID_subid', 'mech_vent_bid']].groupby('ID_subid').max()
E = vae_data_main[['ID_subid', 'vap']].groupby('ID_subid').max()

fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(15,5))

# Plot hazard function
BANDWIDTH = 5
LIM = 61
naf = NelsonAalenFitter()
naf.fit(T, E, timeline=range(0, LIM, 1), label='VA-HARTI')
naf.plot_hazard(ax=ax2, bandwidth=BANDWIDTH)
add_at_risk_counts(naf, ax=ax2)
ax2.text(1, 0.0005, 'bandwidth = '+ "%.0f" %BANDWIDTH, fontsize=12)
ax2.grid(linestyle='dotted', which='both', axis='both')
ax2.minorticks_on()
ax2.set_ylabel('Instantaneous hazard rate of VA-HARTI')
ax2.set_xlabel('Days on mechanical ventilation')
ax2.set_title('Kernel-smoothed function of instantaneous hazard of VA-HARTI\ndepending on the number of ventilator-days')

# Plot competing event function
E = E.replace(0, 2)
cuminc_res = cuminc(T.values.reshape(-1), E.values.reshape(-1))

for name, group in list(cuminc_res.groups.items())[:1]:
    ax1.plot(group.time, group.est, label=name)
    ax1.fill_between(group.time, group.low_ci, group.high_ci, alpha=0.25)
    ax1.set_xlim(0, LIM, 1)

ax1.set_ylim(0, 0.21)
ax1.grid(linestyle='dotted', which='both', axis='both')
ax1.minorticks_on()
ax1.legend('')
ax1.set_ylabel('Cumulative probability of VA-HARTI')
ax1.set_xlabel('Days on mechanical ventilation')
ax1.set_title('Cumulative probability of VA-HARTI depending on the number of ventilator-days\nwhile accounting for extubation as a competing event')
plt.tight_layout()
plt.savefig('./pictures/cuminc_VA_mechvent.pdf', dpi=600)

In [None]:
# Show estimate table from cuminc
df = []
for name, group in cuminc_res.groups.items():
    df.append(group.time)
    df.append(group.est)
    df.append(name)
    
harti = pd.DataFrame(zip(pd.Series(df[0]), pd.Series(df[1])))
print(harti.tail(10))

# Added risk at first 10 days
h8 = harti.iloc[1:18, :].groupby(0).max()
print('\nEach day adds probability: ', h8.diff().mean().values)

In [None]:
# Calculate doubling time for exponential phase VA-HARTI (21 = first 9 days)
v = harti.head(21).groupby(0).max()
rate = v.pct_change()[1].mean()
print("Doubling time, days: ", np.log(2) / np.log(1+rate))
print("Doubling time, hours: ", (np.log(2) / np.log(1+rate)) * 24)

_______