In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import pandas as pd
import autograd.numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Image

from utils.models import BNN, BNN_LV
from utils.functions import gaussian, log_gaussian
from utils.training import BBVI
from utils.data_gen import sample_gaussian_mixture, generate_regression_outputs


In [None]:
# Data Loading
Y_hsc, X_hsc = generate_regression_outputs(type='hsc')
X_test = np.linspace(-6,6, 100)


In [None]:
# Set up the prior, likelihood and posterior
def log_prior(W, mu, sigma):
    """ Generate the prior PDF """
    return np.sum(log_gaussian(x=W, mu=mu, sigma=sigma), axis=-1)

# Set up the prior, likelihood and posterior
def log_latent_prior(Z, mu, gamma):
    """ Generate the prior PDF """
    return np.sum(np.sum(log_gaussian(x=Z, mu=mu, sigma=gamma), axis=-1), axis=-1)

def log_likelihood(W, X, Y, mu, sigma):
    """ Generate the likelihood PDF """
    llh = np.sum(log_gaussian(x=Y, mu=mu, sigma=sigma), axis=0)
    return llh

def create_log_posterior(X, Y, p_mu, p_sigma, l_sigma, nn, gamma, L=1):
    """
    Wrapper to create an initialized posterior PDF.
    """
    # Check dimensions:
    assert len(X.shape)==2, "Expects X to be 2 dimensional (N by M)."
    assert len(Y.shape)==2, "Expects Y to be 2 dimensional (N by K)."
    N,M = X.shape
    _,K = Y.shape
    assert Y.shape[0]==N, f"X and Y should have the same number of rows ({N})."
    # Define single input version (used in wrapper functions below):
    def _log_posterior(W, Z, X=X, Y=Y, p_mu=p_mu, p_sigma=p_sigma, l_sigma=l_sigma, nn=nn, gamma=gamma, L=L):
        """
        2D version of posterior.
        """
        # Get the densities of the priors on W and Z:
        log_pri_weights = log_prior(W=W, mu=p_mu, sigma=p_sigma)
        log_pri_latents = log_latent_prior(Z=Z, mu=0, gamma=gamma)
        # Perform a forward pass and use the result as the mean of the likelihood:
        mu_l = nn.forward(X, weights=W, input_noise=Z)
        log_lhood = log_likelihood(W=W, X=X, Y=Y, mu=mu_l, sigma=l_sigma)
        # Return log posterior:
        log_post = log_pri_weights + log_pri_latents + log_lhood
        return log_post
    # Define a wrapper for the posterior that can handle 2D or 3D case:
    def log_posterior(W, Z, X=X, Y=Y, p_mu=p_mu, p_sigma=p_sigma, l_sigma=l_sigma, nn=nn, gamma=gamma, L=L):
        """
        Posterior of W and Z (which can handle 2D or 3D version of Z).
        """
        # Check dimensions:
        assert len(W.shape)==2, "Expects W to be 2 dimensional (S by D)."
        assert len(Z.shape) in {2,3}, "Expects Z to be 2 dimensional (N by L) or 3 dimensional (S by N by L)."
        assert Z.shape[-2]==N, f"Unexpected shape of Z ({Z.shape}); doesn't match number of observations ({N})."
        assert Z.shape[-1]==L, f"Unexpected shape of Z ({Z.shape}); doesn't match number of latent features ({L})."
        assert W.shape[1]==nn.D, f"The columns of W ({W.shape[1]}) should match the number of weights ({nn.D})."
        # Special case where there are S sets of weights and S sets of Z values:
        if len(Z.shape)==3 and (Z.shape[0]==W.shape[0]):
            # Collect vector of S results:
            results = np.array([
               _log_posterior(
                   w.reshape(1,-1), z,   # Should be  1 by D  and  N by L respectively.
                   X=X, Y=Y, p_mu=p_mu, p_sigma=p_sigma, l_sigma=l_sigma, nn=nn, gamma=gamma, L=L,
               )
                for w,z in zip(W,Z)
            ])
            return np.array(results).flatten()
        # Regular case:
        result = _log_posterior(W, Z, X=X, Y=Y, p_mu=p_mu, p_sigma=p_sigma, l_sigma=l_sigma, nn=nn, gamma=gamma, L=L)
        return result
        
    return log_posterior
    

In [None]:
# Hyperparameters -- network:
N, M = X_hsc.shape  # Input shape.
_, K = Y_hsc.shape  # Output shape.

# Hyperparameters -- latent variables:
L = 1  # Number of latent features (inputs)
gamma = 1
sigma = 1

# Newtork architecture:
architecture = {
    'input_n' : M,  # 1 output.
    'output_n' : K,  # 1 input.
    'hidden_layers' : [3,3],
    'biases' : [1,1,1],
    'activations' : ['relu', 'relu', 'linear'],
    'gamma' : [gamma]*L,  # Repeated for each latent input.
    'sigma' : [sigma]*K,  # Repeated for each network output.
}

# Initialize network:
bnn_lv = BNN_LV(architecture=architecture)

# Get number of weights in network:
D = bnn_lv.D

# Train network:
bnn_lv.fit(X_hsc, Y_hsc, step_size=0.01, max_iteration=5000, check_point=500, regularization_coef=None)


In [None]:
# Import helpers for building callbacks:
from utils.training import build_wb_callback_postpred
from utils.training import build_wb_callback_plotfunc

# Build a callback that produces a scatter plot using W&B built-in functions:
wb_callback_postpred = build_wb_callback_postpred(model=bnn_lv, x_data=X_test)


In [None]:
# Create the posterior :
log_posterior_hsc = create_log_posterior(X_hsc, Y_hsc, 0, 5, 0.25, bnn_lv, gamma=gamma, L=1)

# Get the MLE starting weights from the fitted network:
mle_weights = bnn_lv.get_weights()

# Define BBVI starting point for NN weights:
Mu_init      = mle_weights
Sigma_init   = np.ones(D)

# Define BBVI starting point latent variables (one per data obervation):
Mu_init_Z    = np.zeros(N)
Sigma_init_Z = np.ones(N)*gamma


In [None]:
# Define W&B settings:
wb_settings = {
    'entity' : 'gpestre',
    'project' : 'am207',
    'group' : 'tests',
    'name' : 'bbvi_test',
    'notes' : 'BBVI test',
    'progress' : 10,
#     'base_path' : '../data/',
#     'filename' : 'temp_bbvi_state.json',
    'archive' : {  # Manually archive info about network and priors.
        'architecture' : architecture,
        'N' : N,
        'M' : M,
        'K' : K,
        'L' : L,
        'D' : D,
        'gamma' : gamma,
        'sigma' : sigma,
        'Mu_init' : Mu_init,
        'Sigma_init' : Sigma_init,
        'Mu_init_Z' : Mu_init_Z,
        'Sigma_init_Z' : Sigma_init_Z,
    },
    #'callback' : [wb_callback_postpred],
}

# BBVI settings:
bbvi_params_hsc = {
    'mode' : 'BNN_LV',
    'num_samples' : 100,
    'step_size' : 0.001,
    'num_iters' : 100,
    'random_seed' : 207,
    'Mu_init' : Mu_init,
    'Sigma_init' : Sigma_init,
    'Mu_init_Z' : Mu_init_Z,
    'Sigma_init_Z' : Sigma_init_Z,
    'wb_settings' : wb_settings,
}

# Perform BBVI:
bbvi_hsc = BBVI(log_posterior_hsc, **bbvi_params_hsc, progress=10)
Mu, Sigma = bbvi_hsc.run()

# Plot optimization history:
elbo_hist = bbvi_hsc.elbo_hist
mag_hist = bbvi_hsc.magnitude_hist
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(16,5))
fig.suptitle("BBVI on Benchmark", fontsize=20)
ax1.plot(range(1,1+len(elbo_hist)),elbo_hist)
ax1.set_xlabel("Iteration", fontsize=14)
ax1.set_ylabel("ELBO", fontsize=14)
ax2.plot(range(1,1+len(mag_hist)),mag_hist)
ax2.set_xlabel("Iteration", fontsize=14)
ax2.set_ylabel("Magnitude of gradient", fontsize=14)
plt.show()


In [None]:
# # Set up data
# x_test = np.linspace(-6, 6, 100)

# # Take 100 random posterior samples
# w_random_samples = bayesian_lv_weights[np.random.choice(bayesian_lv_weights.shape[0], 10000), :]

# y_preds = []

# # Loop through the samples of weights
# for i in range(w_random_samples.shape[0]):
#     # Create the same NN for predictions but with weights from the samples
#     w_cur = w_random_samples[i,:]

#     mu_pred = bnn_lv.forward(x_test.reshape(-1,1), w_cur)
#     y_pred = mu_pred
#     y_preds.append(y_pred.reshape(-1))

# # Calculate percentiles
# y_lower = np.percentile(y_preds, q=2.5, axis=0)
# y_upper = np.percentile(y_preds, q=97.5, axis=0)
# y_med = np.percentile(y_preds, q=50, axis=0)

# # Plot with confidence
# plt.figure(figsize=(14,7))
# plt.scatter(X_hsc.flatten(), y_hsc.flatten(), color='black', label='data')
# plt.plot(x_test, y_med, label="Median Prediction")
# plt.fill_between(x_test, y_lower, y_upper, alpha=0.4, color='r', label="95% Predictive Interval")
# plt.title("Bayesian Neural Net Predictions with 95% CI")
# plt.xlabel("X Test")
# plt.ylabel("Y Predicted")
# plt.legend()
# plt.show()
