**Posterior Predictive Checks**

- Simulate survival curves from posterior distributions

- Compare to observed KM curves

- Assess model fit

- Figures:

    - Overlay of posterior predictive survival curves and empirical KM curves

In [None]:
# ============================================================
# Section 8: Posterior Predictive Checks
# ============================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import arviz as az
from lifelines import KaplanMeierFitter

plt.style.use("seaborn-v0_8-whitegrid")

In [None]:
# ------------------------------------------------------------
# Load cleaned baseline data
# ------------------------------------------------------------

df = pd.read_csv("data/processed/pbc_clean.csv")

df_baseline = (
    df.sort_values(["id", "year"])
      .groupby("id", as_index=False)
      .first()
)

# Survival quantities
T_obs = df_baseline["years"].values
E_obs = df_baseline["status2"].values  # 1=event, 0=censored

# Separate continuous and categorical covariates
continuous_vars = ["age", "serBilir", "albumin"]
categorical_vars = ["sex", "drug", "edema"]

# Scale continuous covariates
scaler = StandardScaler()
X_cont = scaler.fit_transform(df_baseline[continuous_vars])

# Keep categorical covariates as-is
X_cat = df_baseline[categorical_vars].values

# Combine design matrix
X = np.column_stack([X_cont, X_cat])

# ------------------------------------------------------------
# Load posterior samples
# ------------------------------------------------------------

trace = az.from_netcdf("results/models/model1_trace.nc")

posterior = trace.posterior


In [None]:
# ------------------------------------------------------------
# Sample posterior predictive survival times
# ------------------------------------------------------------

n_draws = 500  # sufficient for smooth PPC
n_obs = len(T_obs)

# Flatten posterior samples
alpha_samples = posterior["alpha"].values.flatten()
beta_samples = posterior["beta"].values.reshape(-1, X.shape[1])

# Subsample posterior draws
idx = np.random.choice(len(alpha_samples), n_draws, replace=False)
alpha_sub = alpha_samples[idx]
beta_sub = beta_samples[idx]

# Generate posterior predictive survival times
T_tilde = np.zeros((n_draws, n_obs))

for i in range(n_draws):
    log_lambda = alpha_sub[i] + X @ beta_sub[i]
    lambda_ = np.exp(log_lambda)
    T_tilde[i, :] = np.random.exponential(scale=1 / lambda_)

In [None]:
# ------------------------------------------------------------
# Figure 12: Posterior predictive survival curves
# ------------------------------------------------------------

kmf = KaplanMeierFitter()

time_grid = np.linspace(0, T_obs.max(), 100)

plt.figure(figsize=(7, 5))

# Plot posterior predictive curves
for i in range(50):
    kmf.fit(T_tilde[i], event_observed=np.ones(n_obs))
    plt.step(kmf.survival_function_.index,
             kmf.survival_function_["KM_estimate"],
             color="gray", alpha=0.2)

plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")
plt.title("Posterior Predictive Survival Curves")
plt.tight_layout()
plt.savefig("results/figures/ppc_survival_curves.png", dpi=300)
plt.close()

In [None]:
# ------------------------------------------------------------
# Figure 13: PPC vs empirical KM
# ------------------------------------------------------------

plt.figure(figsize=(7, 5))

# Posterior predictive curves
for i in range(50):
    kmf.fit(T_tilde[i], event_observed=np.ones(n_obs))
    plt.step(
        kmf.survival_function_.index,
        kmf.survival_function_["KM_estimate"],
        color="gray",
        alpha=0.15
    )

# Empirical KM
kmf.fit(T_obs, event_observed=E_obs)
plt.step(
    kmf.survival_function_.index,
    kmf.survival_function_["KM_estimate"],
    color="black",
    linewidth=2,
    label="Empirical KM"
)

plt.xlabel("Time (years)")
plt.ylabel("Survival Probability")
plt.title("Posterior Predictive Check: Model vs Data")
plt.legend()
plt.tight_layout()
plt.savefig("results/figures/ppc_overlay_km.png", dpi=300)
plt.close()