In [4]:
from pathlib import Path
import pandas as pd

REPO_ROOT = Path.cwd().parent
OUT = REPO_ROOT / "outputs" / "rtm"

exp = pd.read_parquet(OUT / "water_exposure_Ehat_v0.parquet")
haz = pd.read_parquet(OUT / "hazard_pluvial_v0.parquet")
out = pd.read_parquet(OUT / "outcome_damage_v0.parquet")

df = exp.merge(haz, on="bldg_id").merge(out, on="bldg_id")
len(df)


221324

In [None]:
import pymc as pm
import arviz as az
import numpy as np

rng = np.random.default_rng(42)
n = len(df)
idx = rng.choice(n, size=3000, replace=False)

E = df["E_hat"].to_numpy()[idx]
H = df["H_pluvial_v0"].to_numpy()[idx]
Y = df["Y_damage_v0"].to_numpy()[idx]

print("Using n =", len(Y))

with pm.Model() as model:
    alpha = pm.Normal("alpha", mu=0, sigma=2)
    beta_E = pm.Normal("beta_E", mu=0, sigma=1)
    beta_H = pm.Normal("beta_H", mu=0, sigma=1)

    logit_p = alpha + beta_E * E + beta_H * H

    y_obs = pm.Bernoulli(
        "y_obs",
        logit_p=logit_p,
        observed=Y,
    )

    approx = pm.fit(
        n=8_000,
        method="advi",
        random_seed=42,
        progressbar=True,
    )

    trace = approx.sample(500)


Using n = 5000


Interrupted at 534 [3%]: Average Loss = 3,824.9


In [None]:
az.summary(trace, var_names=["alpha", "beta_E", "beta_H"])


In [None]:
with model:
    ppc = pm.sample_posterior_predictive(
        trace,
        var_names=["y_obs"],
        random_seed=42,
    )

az.plot_ppc(
    az.from_pymc(posterior_predictive=ppc, model=model)
);
