In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import inspect
import pandas as pd
import autograd.numpy as np
import matplotlib.pyplot as plt

from utils.data_gen import generate_regression_outputs
from utils.models import BNN_LV, BayesianModel, SamplerModel
from utils.training import BBVI

# Import helpers for building Weights & Biases callbacks:
from utils.training import build_wb_callback_postpred, build_wb_callback_plotfunc


In [None]:
# Data Loading
Y_train, X_train = generate_regression_outputs(type='hsc')
X_test = np.linspace(-6,6, 100)


In [None]:
# Define Bayesian Neural Network with Latent Variable (BNN_LV):
L = 1
N, M = X_train.shape  # Input shape.
_, K = Y_train.shape  # Output shape.
gamma = 1.0  # Standard deviation of noise for each latent input.
sigma = 1.0  # Standard evation of noise on each model output.

# Newtork architecture:
architecture = {
    'input_n' : M,  # 1 output.
    'output_n' : K,  # 1 input.
    'hidden_layers' : [20,20],
    'biases' : [1,1,1],
    'activations' : ['relu', 'relu', 'linear'],
    'gamma' : [gamma]*L,
    'sigma' : [sigma]*K,
    'seed' : 207,
}

# Initialize network:
bnn_lv = BNN_LV(architecture=architecture)

# Get number of weights in network:
D = bnn_lv.D

# Train network to get MLE estimate as starting point for sampler:
bnn_lv.fit(X_train, Y_train, step_size=0.01, max_iteration=5000, check_point=500, regularization_coef=None)


In [None]:
# Define Bayesian model (with a posterior on W and Z):
bayesian_model = BayesianModel(
    X = X_train,
    Y = Y_train,
    nn = bnn_lv,
    prior_weights_mean = 0,
    prior_weights_stdev = 5.0,
    prior_latents_mean = 0,
    prior_latents_stdev = gamma,
    likelihood_stdev = 0.25,
    output_noise_stdev = sigma,
    label = 'Toy example with heteroscedastic noise',
)
# Wrap the model so that it takes a single input (`samples`) that stores both W and Z:
sampler_model = SamplerModel(bayesian_model)

sampler_model.display()
sampler_model.describe()
sampler_model.info()


In [None]:
# Create the posterior :
log_posterior = sampler_model.log_posterior

# Get the MLE starting weights from the fitted network:
mle_weights = bnn_lv.get_weights()

# Concatenate means for W and Z into a single init vector:
W_mean_init = mle_weights.reshape(1,-1)
Z_mean_init = np.zeros((N,1))
Mu_init = sampler_model.stack(W_mean_init, Z_mean_init)

# Concatenate covariances for W and Z into a single init vector:
W_var_init = np.ones((1,D)) ** 2
Z_var_init = np.ones((N,1))*gamma ** 2
Sigma_init = sampler_model.stack(W_var_init, Z_var_init)

# Build a callback that produces a scatter plot using W&B built-in functions:
wb_callback_postpred_wandb = build_wb_callback_postpred(sampler_model, x_data=X_test, mode='wandb', interval=50)
wb_callback_postpred_pyplot = build_wb_callback_postpred(sampler_model, x_data=X_test, mode='pyplot', interval=50)

# Define W&B settings:
wb_settings = {
    'entity' : 'gpestre',
    'project' : 'am207',
    'group' : 'hsc_bbvi',
    'name' : 'hsc_bbvi_v1',
    'notes' : 'BBVI on toy dataset with heteroscedastic noise.',
    'progress' : 10,
    'base_path' : '../data/',
    'filename' : 'hsc_bbvi_state.json',
    'archive' : {  # Manually archive info about network and priors.
        'architecture' : architecture,
        'N' : N,
        'M' : M,
        'K' : K,
        'L' : L,
        'D' : D,
        'gamma' : gamma,
        'sigma' : sigma,
        'Mu_init' : Mu_init,
        'Sigma_init' : Sigma_init,
    },
    'callback' : [
        wb_callback_postpred_wandb,
        wb_callback_postpred_pyplot,
    ],
}

# BBVI settings:
bbvi_params = {
    'num_samples' : 500,
    'step_size' : 0.001,
    'num_iters' : 2_000,
    'random_seed' : 207,
    'Mu_init' : Mu_init,
    'Sigma_init' : Sigma_init,
    'wb_settings' : wb_settings,
}

# Perform BBVI:
bbvi = BBVI(log_posterior, **bbvi_params, progress=100)
Mu, Sigma = bbvi.run()

# Plot optimization history:
title = "BBVI: " + sampler_model.label if sampler_model.label is not None else ""
elbo_hist = bbvi.elbo_hist
mag_hist = bbvi.magnitude_hist
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(16,5))
fig.suptitle(title, fontsize=20)
ax1.plot(range(1,1+len(elbo_hist)),elbo_hist)
ax1.set_xlabel("Iteration", fontsize=14)
ax1.set_ylabel("ELBO", fontsize=14)
ax2.plot(range(1,1+len(mag_hist)),mag_hist)
ax2.set_xlabel("Iteration", fontsize=14)
ax2.set_ylabel("Magnitude of gradient", fontsize=14)
plt.show()


In [None]:
# Get training data and define test values:
x_test = X_test.flatten()
x_train = sampler_model.X.flatten()
y_train = sampler_model.Y.flatten()
samples = hmc.get_samples()
S = samples.shape[0]
Y_pred = sampler_model.predict(X=x_test.reshape(-1,1), samples=samples).reshape(S,-1)

# Calculate percentiles
y_lower = np.percentile(Y_pred, q=2.5, axis=0)
y_upper = np.percentile(Y_pred, q=97.5, axis=0)
y_med = np.percentile(Y_pred, q=50, axis=0)

# Plot with confidence
plt.figure(figsize=(14,7))
plt.scatter(x_train, y_train, color='black', label='data')
plt.plot(x_test, y_med, label="Median Prediction")
plt.fill_between(x_test, y_lower, y_upper, alpha=0.4, color='r', label="95% Predictive Interval")
plt.title("Bayesian Neural Net Predictions with 95% CI")
plt.xlabel("X Test")
plt.ylabel("Y Predicted")
plt.legend()
plt.show()
