# Demo

In [None]:
import numpy as np
import time
import scipy as sp
import scipy.sparse
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior
from bayesbridge import HorseshoePrior
from simulate_data import simulate_design, simulate_outcome
from util import mcmc_summarizer

## BayesBridge supports both dense (numpy array) and sparse (scipy sparse matrix) design matrices.

In [None]:
n_obs, n_pred = 10 ** 4, 10 ** 3

X = simulate_design(
    n_obs, n_pred, 
    binary_frac=.9,
    binary_pred_freq=.2,
    shuffle_columns=True,
    format_='sparse',
    seed=111
)

In [None]:
beta_true = np.zeros(n_pred)
beta_true[:5] = 1.5
beta_true[5:10] = 1.
beta_true[10:15] = .5

n_trial = np.ones(X.shape[0]) # Binary outcome.
y = simulate_outcome(
    X, beta_true, intercept=0., 
    n_trial=n_trial, model='logit', seed=1
)

## Horseshoe prior

In [None]:
model = RegressionModel(
    y, X, family='logit',
    add_intercept=True, center_predictor=True,
        # Do *not* manually add intercept to or center X.
)

prior = HorseshoePrior(
    n_fixed_effect=0,
    sd_for_intercept=float('inf'),
    sd_for_fixed_effect=1,
    regularizing_slab_size=2.,
    skew_mean=0.,
    skew_sd=1.,
    global_scale_prior=None
)

bridge = BayesBridge(model, prior)

## Run the Gibbs sampler.

In [None]:
samples, mcmc_info = bridge.gibbs(
    n_iter=250, n_burnin=0, thin=1, 
    init={'global_scale': .01},
    coef_sampler_type='cg',
    seed=111
)

Check convergence by looking at the traceplot for posterior log-density.

In [None]:
plt.figure(figsize=(10, 4))
plt.rcParams['font.size'] = 20

plt.plot(samples['logp'])
plt.xlabel('MCMC iteration')
plt.ylabel('Posterior log density')
plt.show()

## Restart MCMC from the last iteration with 'gibbs_resume()'.

In [None]:
samples, mcmc_info = bridge.gibbs_resume(
    mcmc_info, n_add_iter=250
)

In [None]:
plt.figure(figsize=(10, 4))
plt.rcParams['font.size'] = 20

plt.plot(samples['logp'])
plt.xlabel('MCMC iteration')
plt.ylabel('Posterior log density')
plt.show()

Add more samples (while keeping the previous ones) with 'merge=True'.

In [None]:
samples, mcmc_info = bridge.gibbs_resume(
    mcmc_info, n_add_iter=750, merge=True, prev_samples=samples
)
coef_samples = samples['coef'][1:, :] # Extract all but the intercept

In [None]:
plt.figure(figsize=(10, 4))
plt.rcParams['font.size'] = 20

plt.plot(samples['logp'])
plt.xlabel('MCMC iteration')
plt.ylabel('Posterior log density')
plt.show()

## Check mixing of regression coefficients and their posterior marginals.

Typically the convergence is quick and mixing of the regression coefficients is adequate.

In [None]:
plt.figure(figsize=(12, 5))
plt.rcParams['font.size'] = 20

plt.plot(coef_samples[[0, 5, 10, 15], :].T)
plt.xlabel('MCMC iteration')
plt.ylabel(r'$\beta_j$', rotation=0, labelpad=10)
plt.show()

In [None]:
plt.figure(figsize=(14, 5))
plt.rcParams['font.size'] = 20

n_coef_to_plot = 25

mcmc_summarizer.plot_conf_interval(
    coef_samples, conf_level=.95, 
    n_coef_to_plot=n_coef_to_plot, marker_scale=1.4
);
plt.plot(
    beta_true[:n_coef_to_plot], '--', color='tab:orange',
    label='True value'
)
plt.title('Horseshoe')
plt.xlabel(r'Coefficient index $j$')
plt.ylabel(r'$\beta_j$', rotation=0, labelpad=10)
plt.xticks([0, 5, 10, 15, 20])
plt.legend(frameon=False)

plt.show()

### Check the ExpTiltedStableDist class

In [None]:
from bayesbridge.random.tilted_stable import ExpTiltedStableDist

In [None]:
# still unable to use the exp()
from bayesbridge.random.tilted_stable import exp

In [None]:
bridge_exponent = 1. / 16
divide_conquer_cost = 10 ** np.linspace(-1., 1., 101)

char_exponent = bridge_exponent / 2
tilt = divide_conquer_cost ** (1. / char_exponent)
# For Bayesian bridge, tilt parameter is given by beta / global_scale
tilt_power = tilt ** char_exponent

#### Time the samplers at given parameter values

In [None]:
tilted_stable = ExpTiltedStableDist(seed=0)

def time_method(char_exponent, tilt, method, n_rep=1000):
    start = time.time()
    tilted_stable.sample(
        char_exponent * np.ones(n_rep), tilt * np.ones(n_rep), 
        method=method
    );
    elapsed = time.time() - start
    return elapsed

In [None]:
n_repetition = 1000

exec_time = {
    method: 
        np.array([
            time_method(char_exponent, tilt_i, method, n_repetition)
            for tilt_i in tilt
        ]) 
    for method in ['double-rejection', 'divide-conquer']
}

In [None]:
plt.figure(figsize=(7, 4.5))
plt.rcParams['font.size'] = 18

for method in ['double-rejection', 'divide-conquer']:
    plt.plot(tilt_power, exec_time[method] / n_repetition)
plt.xlabel('Cost of divide-conquer (= tilt ^ char-exponent)')
plt.ylabel('Sec. per sample')
plt.ticklabel_format(axis='y', scilimits=(0,0))
plt.ylim(bottom=0)

for side in ['top', 'right']:
    plt.gca().spines[side].set_visible(False)