In [1]:
%cd ../

/scratch/km817/iREC


In [2]:
import numpy as np
import pandas as pd
np.random.seed(0)
# np.random.seed(0)
# !wget "http://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx" --no-check-certificate
data = pd.read_excel('ENB2012_data.xlsx', header=0).iloc[:, :10].values

In [47]:
import torch
import pyro
from torch import nn
import pyro.distributions as dist
from pyro.infer import HMC, MCMC, SVI, NUTS, TraceMeanField_ELBO
from pyro import poutine
from sklearn.datasets import load_boston
import numpy as np
import torch.nn.functional as F
from tqdm.notebook import trange
from rec.utils import kl_estimate_with_mc
import matplotlib.pyplot as plt
import torch.distributions as D
from torch.nn.utils import parameters_to_vector

In [17]:
from Laplace_GCN_Code.preds.laplace import Laplace
from Laplace_GCN_Code.preds.likelihoods import GaussianLh

In [6]:
x_ = data[:, :-2]
y_ = data[:, -2:]

In [7]:
test_splits_idxs = []
for d in range(x_.shape[-1]):
    sorted_x = np.argsort(x_[:,d], axis=-1)
    total_points = sorted_x.shape[0]
    lower_third = total_points // 3
    upper_third = total_points * 2 // 3
    test_index = sorted_x[lower_third: upper_third]
    test_splits_idxs.append(test_index)

In [8]:
test_splits_x, test_splits_y = [], []
train_splits_x, train_splits_y = [], []
for d in range(x_.shape[-1]):
    a = np.arange(x_.shape[0])
    test_index = test_splits_idxs[d]
    train_index = np.delete(a, test_index, axis=0)
    x_train = x_[train_index]
    y_train = y_[train_index]
    x_test = x_[test_index][:]
    y_test = y_[test_index][:]
    x_m = x_train.mean(0)
    x_s = x_train.std(0)
    x_train = (x_train - x_m) / x_s
    x_test = (x_test - x_m) / x_s
    test_splits_x.append(x_test)
    test_splits_y.append(y_test)
    train_splits_x.append(x_train)
    train_splits_y.append(y_train)

In [9]:
D_in = x_train.shape[-1]
D_out = y_test.shape[-1]
x_train = torch.FloatTensor(np.array(train_splits_x))
y_train = torch.FloatTensor(np.array(train_splits_y))
x_test= torch.FloatTensor(np.array(test_splits_x))
y_test = torch.FloatTensor(np.array(test_splits_y))

In [10]:
def regression_model(x, y=None, weight_samples=None, in_size=1, num_nodes=10, out_size=1, ELBO_BETA=1.):
    # sample vector of weights for regression
    total_weights = (in_size + 1) * num_nodes + (num_nodes + 1) * num_nodes + (num_nodes + 1) * out_size
    # sample params
    with poutine.scale(scale=ELBO_BETA):
        params = pyro.sample("params", dist.Normal(torch.zeros(total_weights + D_out), 1.).to_event(1))
    weights, rho = params[:-D_out], params[-D_out:]

    idx = 0
    fc1_weights = weights[idx: idx + in_size * num_nodes].reshape(num_nodes, in_size)
    idx += in_size * num_nodes
    fc1_bias = weights[idx: idx + num_nodes].reshape(num_nodes)
    idx += num_nodes

    fc2_weights = weights[idx: idx + num_nodes * num_nodes].reshape(num_nodes, num_nodes)
    idx += num_nodes * num_nodes
    fc2_bias = weights[idx: idx + num_nodes].reshape(num_nodes)
    idx += num_nodes

    fc3_weights = weights[idx: idx + num_nodes * out_size].reshape(out_size, num_nodes)
    idx += num_nodes * out_size
    fc3_bias = weights[idx: idx + out_size].reshape(out_size)
    idx += out_size

    assert idx == total_weights, "Something wrong with number of weights!"

    # compute forward pass
    batch_shape = x.shape[0]
    x = torch.einsum("ij, kj -> ki", fc1_weights, x) + fc1_bias[None].repeat(batch_shape, 1)
    x = torch.relu(x)

    x = torch.einsum("ij, kj -> ki", fc2_weights, x) + fc2_bias[None].repeat(batch_shape, 1)
    x = torch.relu(x)

    x = torch.einsum("ij, kj -> ki", fc3_weights, x) + fc3_bias[None].repeat(batch_shape, 1)
    mu = x.squeeze()

    with pyro.plate("data"):
        obs = pyro.sample("obs", dist.MultivariateNormal(loc=mu, 
                                                         covariance_matrix=torch.diag(F.softplus(rho) ** 2)), obs=y)
    return mu

def make_empirical_gmm(samples, num_nodes, x_test):
    rho_noise = samples['params'][:, -D_out:]
    noise = F.softplus(rho_noise) ** 2
    preds_dict = Predictive(regression_model, samples, return_sites=['_RETURN'])(x_test, None, num_nodes=num_nodes,
                                                                                 in_size=D_in, out_size=D_out)
    preds = preds_dict['_RETURN']
    mix = dist.Categorical(torch.ones(preds.shape[0]))
    comp = dist.MultivariateNormal(loc=preds.squeeze().permute(1, 0, 2), covariance_matrix=torch.diag_embed(noise))
    gmm = dist.MixtureSameFamily(mix, comp)
    return gmm

In [11]:
class map_regression_model(nn.Module):
    def __init__(self, in_size=1, num_nodes=10, out_size=1):
        super(map_regression_model, self).__init__()
        self.in_size = in_size
        self.out_size = out_size
        self.activation = torch.relu
        self.num_nodes = num_nodes

        self.fc1_weights = nn.Parameter(torch.randn(self.num_nodes, self.in_size))
        self.fc1_bias = nn.Parameter(torch.randn(self.num_nodes))

        self.fc2_weights = nn.Parameter(torch.randn(self.num_nodes, self.num_nodes))
        self.fc2_bias = nn.Parameter(torch.randn(self.num_nodes))

        self.fc3_weights = nn.Parameter(torch.randn(self.out_size, self.num_nodes))
        self.fc3_bias = nn.Parameter(torch.randn(self.out_size))

        self.rho = nn.Parameter(torch.randn(1))

    # compute forward pass
    def forward(self, x):
        batch_shape = x.shape[0]
        x = torch.einsum("ij, kj -> ki", self.fc1_weights, x) + self.fc1_bias[None].repeat(batch_shape, 1)
        x = torch.relu(x)

        x = torch.einsum("ij, kj -> ki", self.fc2_weights, x) + self.fc2_bias[None].repeat(batch_shape, 1)
        x = torch.relu(x)

        x = torch.einsum("ij, kj -> ki", self.fc3_weights, x) + self.fc3_bias[None].repeat(batch_shape, 1)
        x = x.squeeze()

        return x  
    
    def loss_function(self, x, y):
        y_preds = self.forward(x)
        return -D.Normal(loc=y_preds, scale=F.softplus(self.rho)).log_prob(y).sum()

In [12]:
class deterministic_regression_model(nn.Module):
    def __init__(self, params, in_size=1, num_nodes=10, out_size=1):
        super(deterministic_regression_model, self).__init__()
        self.in_size = in_size
        self.out_size = out_size
        self.activation = torch.relu
        self.num_nodes = num_nodes
        weights, rho = params[:-out_size], params[-out_size:]

        idx = 0
        self.fc1_weights = weights[idx: idx + self.in_size * self.num_nodes].reshape(self.num_nodes, self.in_size)
        idx += self.in_size * self.num_nodes
        self.fc1_bias = weights[idx: idx + self.num_nodes].reshape(self.num_nodes)
        idx += self.num_nodes

        self.fc2_weights = weights[idx: idx + self.num_nodes * self.num_nodes].reshape(self.num_nodes, self.num_nodes)
        idx += self.num_nodes * self.num_nodes
        self.fc2_bias = weights[idx: idx + self.num_nodes].reshape(self.num_nodes)
        idx += self.num_nodes

        self.fc3_weights = weights[idx: idx + self.num_nodes *self.out_size].reshape(self.out_size, self.num_nodes)
        idx += self.num_nodes *self.out_size
        self.fc3_bias = weights[idx: idx +self.out_size].reshape(self.out_size)
        idx +=self.out_size
        
        self.weights = weights
        self.rho = rho
        self.params = params

        # compute forward pass
    
    def forward(self, x):
        batch_shape = x.shape[0]
        x = torch.einsum("ij, kj -> ki", self.fc1_weights, x) + self.fc1_bias[None].repeat(batch_shape, 1)
        x = torch.relu(x)

        x = torch.einsum("ij, kj -> ki", self.fc2_weights, x) + self.fc2_bias[None].repeat(batch_shape, 1)
        x = torch.relu(x)

        x = torch.einsum("ij, kj -> ki", self.fc3_weights, x) + self.fc3_bias[None].repeat(batch_shape, 1)
        x = x.squeeze()
        
        return x
    
    def weight_prior_lp(self):
        return dist.Normal(loc=0., scale=1.).log_prob(self.params).mean()
    
    def data_likelihood(self, x, y):
        likelihood = dist.Normal(loc=self.forward(x),
                              scale=F.softplus(self.rho))
        return likelihood.log_prob(y).sum(-1).mean()
    
    def joint_log_prob(self, x, y):
        return self.data_likelihood(x, y) + self.weight_prior_lp(x, y)
    
    def make_weights_from_sample(self, params):
        weights, rho = params[:-self.out_size], params[-self.out_size:]

        idx = 0
        self.fc1_weights = weights[idx: idx + self.in_size * self.num_nodes].reshape(self.num_nodes, self.in_size)
        idx += self.in_size * self.num_nodes
        self.fc1_bias = weights[idx: idx + self.num_nodes].reshape(self.num_nodes)
        idx += self.num_nodes

        self.fc2_weights = weights[idx: idx + self.num_nodes * self.num_nodes].reshape(self.num_nodes, self.num_nodes)
        idx += self.num_nodes * self.num_nodes
        self.fc2_bias = weights[idx: idx + self.num_nodes].reshape(self.num_nodes)
        idx += self.num_nodes

        self.fc3_weights = weights[idx: idx + self.num_nodes * self.out_size].reshape(self.out_size, self.num_nodes)
        idx += self.num_nodes *self.out_size
        self.fc3_bias = weights[idx: idx + self.out_size].reshape(self.out_size)
        idx += self.out_size
        
        self.weights = weights
        self.rho = rho
        self.params = params

In [13]:
pyro.set_rng_seed(10)
ELBO_BETA = 1.
S=1
in_size = x_train.shape[-1]
num_nodes = 3

# Train MAP estimate

In [22]:
from Laplace_GCN_Code.preds.models import MLPS

model = MLPS(input_size=D_in, hidden_sizes=[num_nodes]*1, output_size=D_out)

In [41]:
class homo_noise_model(nn.Module):
    def __init__(self, old_model):
        super(homo_noise_model, self).__init__()
        self.oldmodel = old_model
        self.rho = nn.Parameter(torch.randn(1))

In [42]:
model_w_noise = homo_noise_model(model)

In [55]:
optim = torch.optim.Adam(model_w_noise.parameters(), lr=1e-2)
prior_prec = 1
losses = list()
num_iterations = 10000
pbar = trange(num_iterations)
for j in pbar:
    f = model(x_train[S])
    w = parameters_to_vector(model_w_noise.parameters())
    reg = 0.5 * prior_prec * w @ w
    lh = GaussianLh(sigma_noise=F.softplus(model_w_noise.rho))
    loss = - lh.log_likelihood(y_train[S], f) + reg
    loss.backward()
    optim.step()
    losses.append(loss.item())
    pbar.set_description("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_train)))
    model_w_noise.zero_grad()

  0%|          | 0/10000 [00:00<?, ?it/s]

In [117]:
lh = GaussianLh(F.softplus(model_w_noise.rho.detach()))
posterior = Laplace(model, prior_prec, lh)

In [126]:
posterior.infer(train_loader, cov_type='full')

AssertionError: 

In [125]:
posterior.Sigma

tensor([3.9386e-05, 3.9388e-05, 3.0950e-05, 5.1048e-05, 5.4764e-05, 5.4762e-05,
        5.4316e-05, 5.2974e-05, 7.4291e-05, 8.0182e-05, 1.3283e-04, 7.4850e-05,
        8.6408e-05, 8.6406e-05, 5.8138e-05, 5.8095e-05, 4.5158e-04, 2.9369e-04,
        3.0561e-04, 2.9078e-04, 1.3864e-04, 1.3864e-04, 1.3897e-04, 1.3776e-04,
        5.4764e-05, 8.6408e-05, 1.3864e-04, 4.4131e-03, 4.0901e-03, 3.4409e-03,
        4.4131e-03, 4.0901e-03, 3.4409e-03, 2.7968e-03, 2.7968e-03])

In [89]:
posterior.mu

tensor([ 6.1666e-01,  2.0569e-01, -1.1963e+00,  5.5990e-01, -1.7350e+00,
        -6.8464e-03, -1.1957e-01,  2.8761e-02,  1.2986e+00,  9.8996e-01,
         1.5477e-01,  1.1111e+00,  6.2162e-01, -5.9374e-03,  5.9999e-01,
         6.5159e-02,  1.9713e+00, -8.4253e-01, -2.2525e+00, -3.7284e-01,
        -4.1940e+00,  8.4336e-03,  5.6838e-02, -2.7724e-02, -4.4291e-02,
         1.4071e+00,  2.9631e+00, -1.0842e+01,  1.0745e+01,  8.6865e+00,
        -1.0073e+01,  7.8678e+00,  8.9527e+00,  5.3750e+00,  9.9558e+00])

In [94]:
posterior.Sigma.Lams

[[tensor([ 5201.3369,  6981.8086, 24860.2207]),
  tensor([0.0000e+00, 1.0324e-03, 5.8550e-02, 6.3212e-01, 7.8704e-01, 1.0000e+00,
          1.2130e+00, 4.3083e+00])],
 [tensor([ 5201.3369,  6981.8086, 24860.2207])],
 [tensor([356.5528, 356.5528]), tensor([0.2181, 0.6611, 1.2487])],
 [tensor([356.5528, 356.5528])]]

In [75]:
parameters_to_vector(model.parameters())

tensor([ 6.1666e-01,  2.0569e-01, -1.1963e+00,  5.5990e-01, -1.7350e+00,
        -6.8464e-03, -1.1957e-01,  2.8761e-02,  1.2986e+00,  9.8996e-01,
         1.5477e-01,  1.1111e+00,  6.2162e-01, -5.9374e-03,  5.9999e-01,
         6.5159e-02,  1.9713e+00, -8.4253e-01, -2.2525e+00, -3.7284e-01,
        -4.1940e+00,  8.4336e-03,  5.6838e-02, -2.7724e-02, -4.4291e-02,
         1.4071e+00,  2.9631e+00, -1.0842e+01,  1.0745e+01,  8.6865e+00,
        -1.0073e+01,  7.8678e+00,  8.9527e+00,  5.3750e+00,  9.9558e+00],
       grad_fn=<CatBackward>)

In [63]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.6167,  0.2057, -1.1963,  0.5599, -1.7350, -0.0068, -0.1196,  0.0288],
         [ 1.2986,  0.9900,  0.1548,  1.1111,  0.6216, -0.0059,  0.6000,  0.0652],
         [ 1.9713, -0.8425, -2.2525, -0.3728, -4.1940,  0.0084,  0.0568, -0.0277]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.0443,  1.4071,  2.9631], requires_grad=True),
 Parameter containing:
 tensor([[-10.8421,  10.7447,   8.6865],
         [-10.0729,   7.8678,   8.9527]], requires_grad=True),
 Parameter containing:
 tensor([5.3750, 9.9558], requires_grad=True)]

In [None]:
optimizer = pyro.optim.Adam({"lr": 1e-2})

# train Factored Gaussian approx
from pyro.infer.autoguide import AutoDiagonalNormal
guide = AutoDiagonalNormal(regression_model)
svi = SVI(regression_model, guide, optimizer, loss=TraceMeanField_ELBO())
num_iterations = 20000
pyro.clear_param_store()
pbar = trange(num_iterations)
losses = []
for j in pbar:
    # calculate the loss and take a gradient step
    loss = svi.step(x_train[S], y_train[S], ELBO_BETA=ELBO_BETA, num_nodes=num_nodes, in_size=D_in, out_size=D_out)
    losses.append(loss)
    pbar.set_description("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_train)))
guide.requires_grad_(False)

params = []
for name, value in pyro.get_param_store().items():
    params.append(pyro.param(name))

In [None]:
plt.plot(losses[-1000:])

In [None]:
means, stds = params
variational_posterior = dist.MultivariateNormal(loc=means, covariance_matrix=torch.diag(stds ** 2))
variational_sample = variational_posterior.sample((50,))
variational_samples = {"params" : variational_sample}
kl_var_prior = kl_estimate_with_mc(variational_posterior, prior)
var_pred = Predictive(regression_model, variational_samples, return_sites=['obs', '_RETURN'])(x_test[S], None, 
                                                                        num_nodes=num_nodes, in_size=D_in,
                                                                                             out_size=D_out)
VAR_RMSE = ((var_pred['_RETURN'].mean(0) - y_test[S]) ** 2).mean().sqrt()

In [None]:
from pyro.infer import Predictive
var_gmm = make_empirical_gmm(variational_samples, num_nodes, x_test[S])

In [None]:
var_gmm.log_prob(y_test[S]).mean()

In [None]:
hmc_gmm = make_empirical_gmm(full_samples, num_nodes, x_test[S])
kde_gmm = make_empirical_gmm(kde_samples, num_nodes, x_test[S])
var_gmm = make_empirical_gmm(variational_samples, num_nodes, x_test[S])
print(f"The final KLs are: KDE {kl_kde_prior}, VAR {kl_var_prior}\n"
      f"The final RMSE are: HMC {HMC_RMSE}, KDE {KDE_RMSE}, VAR {VAR_RMSE}\n"
      f"The final LLs are: HMC {hmc_gmm.log_prob(y_test[S]).mean()}, KDE {kde_gmm.log_prob(y_test[S]).mean()}, VAR {var_gmm.log_prob(y_test[S]).mean()}.")

In [None]:
hmc_gmm = make_empirical_gmm(full_samples, num_nodes, x_train[S])
kde_gmm = make_empirical_gmm(kde_samples, num_nodes, x_train[S])
var_gmm = make_empirical_gmm(variational_samples, num_nodes, x_train[S])
print(f"The final KLs are: KDE {kl_kde_prior}, VAR {kl_var_prior}\n"
      f"The final RMSE are: HMC {HMC_RMSE}, KDE {KDE_RMSE}, VAR {VAR_RMSE}\n"
      f"The final LLs are: HMC {hmc_gmm.log_prob(y_train[S]).mean()}, KDE {kde_gmm.log_prob(y_train[S]).mean()}, VAR {var_gmm.log_prob(y_train[S]).mean()}.")

# Compress weights

In [None]:
#lets compress some samples
#### sample weights with compression algorithm
from tqdm.notebook import trange
from rec.beamsearch.Coders.Encoder_Empirical import Encoder
from rec.beamsearch.distributions.CodingSampler import CodingSampler
from rec.beamsearch.distributions.EmpiricalMixturePosterior import EmpiricalMixturePosterior
from rec.beamsearch.samplers.GreedySampling_BNNs import GreedySampler
import pyro.distributions as dist

In [None]:
dummy_model = deterministic_regression_model(full_samples['params'][10], in_size=D_in, num_nodes=num_nodes, out_size=D_out)


In [None]:
samples = full_samples['params']

In [None]:
kl_q_p = kl_kde_prior

In [None]:
from rec.OptimisingVars.FinalJointOptimiser import FinalJointOptimiser
coding_sampler = CodingSampler
auxiliary_posterior = EmpiricalMixturePosterior
selection_sampler = GreedySampler
omega = 5
initial_seed = 0
beamwidth = 1
epsilon = 0.
dummy_encoder = Encoder(dummy_model,
                     x_train[S],
                     y_train[S],
                     samples,
                     initial_seed,
                     coding_sampler,
                     selection_sampler,
                     auxiliary_posterior,
                     omega,
                     beamwidth,
                     epsilon=epsilon,
                     prior_var=1.,
                     total_kl=kl_q_p)

z_sample = samples.mean(0)
omega = 5
n_trajectories = 64
n_auxiliaries = dummy_encoder.n_auxiliary
prior_var = 1.
emp_opt = FinalJointOptimiser(z_sample, omega, n_auxiliaries, kl_q_p, n_trajectories, prior_var)
aux_vars = emp_opt.run_optimiser(epochs=5000)

In [None]:
del dummy_encoder

In [None]:
coding_sampler = CodingSampler
auxiliary_posterior = EmpiricalMixturePosterior
selection_sampler = GreedySampler
omega = 5

initial_seed = 0
beamwidth = 1
epsilon = 0.2
num_compressed_samples = 500
compressed_weights_emp_low_eps = []

for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = Encoder(dummy_model,
                     x_train[S],
                     y_train[S],
                     samples,
                     initial_seed,
                     coding_sampler,
                     selection_sampler,
                     auxiliary_posterior,
                     omega,
                     beamwidth,
                     epsilon=epsilon,
                     prior_var=1.,
                     total_kl=kl_q_p)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    
    w, idx = encoder.run_encoder()
    compressed_weights_emp_low_eps.append(w[0])

In [None]:
weight_samples = torch.zeros([0])
for w in compressed_weights_emp_low_eps:
    weight_samples = torch.cat([weight_samples, w[None]], dim=0)

In [None]:
weight_samples = {'params':weight_samples}

In [None]:
compressed = Predictive(regression_model, weight_samples, return_sites=['obs', '_RETURN'])(x_test[S], None, 
                                                                        num_nodes=num_nodes, in_size=D_in,
                                                                                             out_size=D_out)

In [None]:
compressed_gmm = make_empirical_gmm(weight_samples, num_nodes, x_test[S])

In [None]:
compressed_gmm.log_prob(y_test[S]).mean()

In [None]:
from rec.beamsearch.distributions.CodingSampler import CodingSampler
from rec.beamsearch.distributions.KDEPosterior import KDEPosterior
from rec.beamsearch.samplers.GreedySampling_BNNs import GreedySampler
from rec.beamsearch.Coders.Encoder_KDE_BNN import EncoderKDE

In [None]:
KDE_target = kde

In [None]:
coding_sampler = CodingSampler
auxiliary_posterior = KDEPosterior
selection_sampler = GreedySampler
omega = 5

initial_seed = 0
beamwidth = 1
epsilon = 0.2
num_compressed_samples = 500
compressed_weights_kde_low_eps = []

for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = EncoderKDE(model=dummy_model,
                         x_data=x_train[S],
                         y_data=y_train[S],
                         target=KDE_target,
                         initial_seed=initial_seed,
                         coding_sampler=coding_sampler,
                         selection_sampler=selection_sampler,
                         auxiliary_posterior=auxiliary_posterior,
                         omega=omega,
                         epsilon=epsilon,
                         beamwidth=beamwidth,
                         prior_var=1.,
                        total_kl=kl_q_p)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    
    w, idx = encoder.run_encoder()
    compressed_weights_kde_low_eps.append(w[0])

In [None]:
weight_samples_kde = torch.zeros([0])
for w in compressed_weights_kde_low_eps:
    weight_samples_kde = torch.cat([weight_samples_kde, w[None]], dim=0)

In [None]:
log_probs_kde = torch.zeros([0])
for w in compressed_weights_kde_low_eps:
    dummy_model.make_weights_from_sample(w)
    log_probs_kde = torch.cat([log_probs_kde, dummy_model.data_likelihood(x_train[S], y_train[S])[None]], dim=-1)

In [None]:
test = {"params":weight_samples_kde[torch.topk(log_probs_kde, k=25)[1]]}

In [None]:
weight_samples_kde

In [None]:
weight_samples_kde = {'params':weight_samples_kde}

In [None]:
compressed = Predictive(regression_model, weight_samples_kde, return_sites=['obs', '_RETURN'])(x_test[S], None, 
                                                                        num_nodes=num_nodes, in_size=D_in,
                                                                                             out_size=D_out)

In [None]:
compressed_gmm_kde = make_empirical_gmm(weight_samples_kde, num_nodes, x_test[S])

In [None]:
compressed_gmm_kde.log_prob(y_test[S]).mean()

In [None]:
compressed_gmm_kde_train = make_empirical_gmm(weight_samples_kde, num_nodes, x_train[S])

compressed_gmm_kde_train.log_prob(y_train[S]).mean()

# Variational Scheme

In [None]:
#### sample weights with compression algorithm
from rec.beamsearch.Coders.Encoder_Variational import Encoder
from rec.beamsearch.distributions.CodingSampler import CodingSampler
from rec.beamsearch.distributions.VariationalPosterior import VariationalPosterior
from rec.beamsearch.samplers.GreedySampling import GreedySampler
from rec.OptimisingVars.VariationalOptimiser import VariationalOptimiser
from tqdm.notebook import trange
coding_sampler = CodingSampler
auxiliary_posterior = VariationalPosterior
selection_sampler = GreedySampler
omega = 5

initial_seed = 0
beamwidth = 1
epsilon = 0.



compute_params_enc =  encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1.)

n_auxiliaries = compute_params_enc.n_auxiliary
kl_q_p = compute_params_enc.total_kl
var_opt = VariationalOptimiser(compute_params_enc.target, omega, n_auxiliaries, kl_q_p, n_trajectories=16, total_var=1.)
aux_vars = var_opt.run_optimiser(epochs=1000, lr=1e-1)

In [None]:
del compute_params_enc

In [None]:
initial_seed = 0
beamwidth = 1
epsilon = 0.2

compressed_weights_var_high_eps = []
num_compressed_samples = 500
for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1.)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    w, idx = encoder.run_encoder()

    compressed_weights_var_high_eps.append(w[0])

In [None]:
weight_samples_var = torch.zeros([0])
for w in compressed_weights_var_high_eps:
    weight_samples_var = torch.cat([weight_samples_var, w[None]], dim=0)

In [None]:
weight_samples_var = {'params':weight_samples_var}

In [None]:
compressed = Predictive(regression_model, weight_samples_var, return_sites=['obs', '_RETURN'])(x_test[S], None, 
                                                                        num_nodes=num_nodes, in_size=D_in,
                                                                                             out_size=D_out)

In [None]:
compressed_gmm_var = make_empirical_gmm(weight_samples_var, num_nodes, x_test[S])

compressed_gmm_var.log_prob(y_test[S]).mean()

In [None]:
compressed_gmm_var_train = make_empirical_gmm(weight_samples_var, num_nodes, x_train[S])

compressed_gmm_var_train.log_prob(y_train[S]).mean()

In [None]:
import pickle as pkl
# pkl.dump(full_samples, open(f"PickledStuff/BNN_UCI/ENERGY/full_samples_split{S}.pkl", "wb"))
# pkl.dump(kde_samples, open(f"PickledStuff/BNN_UCI/ENERGY/kde_samples_split{S}.pkl", "wb"))
pkl.dump(variational_samples, open(f"PickledStuff/BNN_UCI/ENERGY/variational_samples_split{S}.pkl", "wb"))
# pkl.dump(weight_samples_kde, open(f"PickledStuff/BNN_UCI/ENERGY/compressed_kde_samples_split{S}.pkl", "wb"))
# pkl.dump(weight_samples, open(f"PickledStuff/BNN_UCI/ENERGY/compressed_emp_samples_split{S}.pkl", "wb"))
pkl.dump(weight_samples_var, open(f"PickledStuff/BNN_UCI/ENERGY/compressed_var_samples_split{S}.pkl", "wb"))
# pkl.dump(kl_kde_prior, open(f"PickledStuff/BNN_UCI/ENERGY/kde_kl_split{S}.pkl", "wb"))
pkl.dump(kl_var_prior, open(f"PickledStuff/BNN_UCI/ENERGY/var_kl_split{S}.pkl", "wb"))

In [None]:
print('done')