# Approximation to Friedman's five dimension function

In [None]:
import pymc as pm
import numpy as np
import matplotlib.pyplot as plt
import arviz as az

In [None]:
az.style.use('arviz-white')
%config Completer.use_jedi = False
np.random.seed(1846)

## Data generation

In [None]:
X = np.random.uniform(low=0, high=1.0, size=(100, 1000))


In [None]:
f_x = 10*np.sin(np.pi*X[:,0]*X[:,1]) + 20*(X[:,2]-0.5)**2 + 10*X[:,3] + 5*X[:,4]
Y = np.random.normal(f_x, 1)

## Different number of variables  

Using default options of BART and 4 chains.

In [None]:
num_covariables = ['5', '10', '100', '1000']
idatas = {}
VIs = []
for num_covariable in num_covariables:
    with pm.Model(rng_seeder=678) as model:
        μ = pm.BART('μ', X[:,:int(num_covariable)], Y, m=200,  alpha=0.25)
        σ = pm.HalfNormal('σ', 1)
        y = pm.Normal('y', mu=μ, sigma=σ, observed=Y)
        idata = pm.sample(1000, tune=1000, chains=4, random_seed=678)
        idatas[num_covariable] = idata
        VI = idata.sample_stats["variable_inclusion"].mean(("chain", "draw")).values
        VIs.append(VI/VI.sum())

In [None]:
for num_covariable in num_covariables:
    az.plot_trace(idatas[num_covariable]);
#plt.savefig("friedman_traces.png")

In [None]:
for num_covariable in num_covariables:
    print(az.rhat(idatas[num_covariable]).mean().values())

In [None]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)

for i, (name, ax) in enumerate(zip(num_covariables, axes.ravel())):
    μs = idatas[name].posterior["μ"].stack(samples=["chain",  "draw"])
    ax.set_title(f"covariables={name}")
    ax.axline([0, 0], [1, 1], color="0.5")
    mean = μs.mean("samples")
    hdi = az.hdi(μs.T.values, hdi_prob=0.9)
    yerr = np.vstack([mean - hdi[:,0], hdi[:,1] - mean])
    ax.errorbar(f_x, mean, yerr,
                linestyle='None', marker='.', alpha=0.5)

    fig.text(0.42, 1.05, "in-sample", fontsize=16)
    fig.text(0.42, -0.05, "observed (f_x)", fontsize=15)
    fig.text(-0.05, 0.42, "predicted (f_x)", fontsize=15, rotation=90)
    print((np.mean((f_x - mean)**2)**0.5).values)
plt.savefig("friedman_covar_insample.png")

In [None]:
for idx, (VI, num_covariable) in enumerate(zip(VIs, num_covariables)):
    plt.plot(VI[:5], label=num_covariable, color=f"C{idx}")
    plt.plot(5, np.mean(VI[5:]), ".") 
plt.legend();
plt.savefig("friedman_VI.png")

In [None]:
for num_covariable in num_covariables[0]:
    axes = pm.bart.plot_dependence(idatas[num_covariable], X, Y, 
                                   rug=False, var_idx=range(min(10, int(num_covariable))), grid=(2,5))
    for ax in np.ravel(axes):
        ax.set_ylim(5, 20)
    plt.savefig(f"pdp_friedman_200_{num_covariable}.png")