# Bikes


This is similar to https://docs.pymc.io/projects/examples/en/latest/BART/BART_introduction.html

In [None]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pymc_experimental as pmx

In [None]:
RANDOM_SEED = 8457
rng = np.random.RandomState(RANDOM_SEED)
az.style.use("arviz-darkgrid")

## Biking with BART

In [None]:
bikes = pd.read_csv("bikes.csv")

X = bikes[["hour", "temperature", "humidity", "windspeed"]]
Y = bikes["count"]

In [None]:
with pm.Model(rng_seeder=rng) as model_bikes:
    σ = pm.HalfNormal("σ", Y.std())
    μ = pmx.BART("μ", X, Y, m=50)
    y = pm.Normal("y", μ, σ, observed=Y)
    idata_bikes = pm.sample()

### Partial dependence plots

In [None]:
pmx.bart.plot_dependence(idata_bikes, X=X, Y=Y, grid=(2, 2));
# plt.savefig("pdp_discrete.png", bbox_inches='tight')

### Variable importance

In [None]:
VI = (
    idata_bikes.sample_stats["variable_inclusion"]
    .stack(samples=("chain", "draw"))
    .mean("samples")
    .values
)

In [None]:
_, ax = plt.subplots(1)

ax.plot(VI / VI.sum(), "o-")

ax.set_xticks(range(4))
ax.set_xticklabels(["hour", "temperature", "humidity", "windspeed"])
ax.set_ylabel("relative importance");

In [None]:
fig = plt.figure()

pmx.bart.utils.plot_variable_importance(idata_bikes, X=X, figsize=(8, 6))

plt.savefig("bikes_VI-correlation.png", box_inches="tight", dpi=300);