In [3]:
%load_ext autoreload

In [4]:
%autoreload 2

In [5]:
%cd ../..

/scratch/km817/iREC


In [2]:
import torch
import hamiltorch
import matplotlib.pyplot as plt
import torch.distributions as D
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

In [6]:
%matplotlib inline

In [7]:
hamiltorch.set_random_seed(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
hamiltorch.__version__

'0.4.0.dev1'

In [9]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data'
data = pd.read_csv(url, header=None)

x_ = data.iloc[:, 1:-1].values
y_ = data.iloc[:, -1].values.reshape(-1, 1)

np.random.seed(0)
shuffle_idxs = np.arange(x_.shape[0])
np.random.shuffle(shuffle_idxs)

num_train = int(x_.shape[0] * 0.5)
train_idxs = shuffle_idxs[:num_train]
test_idxs = shuffle_idxs[:num_train]
x_train = x_[train_idxs]
x_m = x_train.mean(0)
x_s = x_train.std(0)
x_train = (x_train - x_m) / x_s
x_test = x_[test_idxs]
x_test = (x_test - x_m) / x_s
y_train = y_[train_idxs]
y_test = y_[test_idxs]

D_in = x_train.shape[-1]
D_out = 10
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(y_train)
x_test= torch.FloatTensor(x_test)
y_test = torch.FloatTensor(y_test)

In [10]:
class Net(nn.Module):
    def __init__(self, num_nodes: int = 10):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, num_nodes)
        self.fc2 = nn.Linear(num_nodes, num_nodes)
        self.fc3 = nn.Linear(num_nodes, 3)

    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x
num_nodes = 2
net = Net(num_nodes=num_nodes)


print(net)

Net(
  (fc1): Linear(in_features=4, out_features=2, bias=True)
  (fc2): Linear(in_features=2, out_features=2, bias=True)
  (fc3): Linear(in_features=2, out_features=3, bias=True)
)


# MF-VI Approx

In [11]:
def compute_categorical_mixture(preds, y):
    mix = D.Categorical(torch.ones(preds.shape[0]))
    comp = D.Categorical(logits=preds.permute(1, 0, 2))
    
    mixture_of_categorical = D.MixtureSameFamily(mix, comp)
    mean_preds = torch.argmax(mixture_of_categorical.component_distribution.probs.mean(1), dim=1).float()
    accuracy = torch.sum(mean_preds == y) / y.shape[0]
    
    ll = mixture_of_categorical.log_prob(y).sum()
    return accuracy, ll

In [18]:
from tqdm.notebook import trange

from models.BNNs.VI_BNN_CLASSIFICTION import VI_BNN
from models.BNNs.pyroBNN_classification import BayesianNeuralNetwork
from pyro.infer.autoguide import AutoDiagonalNormal

In [19]:
alpha = 1.
ELBO_BETA = .1

In [20]:
model = BayesianNeuralNetwork(in_features=D_in, prior_var=1./alpha, hidden_nodes=num_nodes, kl_beta=ELBO_BETA, out_features=D_out)
guide = AutoDiagonalNormal(model)

In [21]:
import pyro
from pyro.infer import SVI, Trace_ELBO

adam = pyro.optim.Adamax({"lr": 1e-1})
svi = SVI(model, guide, adam, loss=Trace_ELBO())

num_iterations = 5000
pyro.clear_param_store()
for j in trange(num_iterations):
    # calculate the loss and take a gradient step
    loss = svi.step(x_train, y_train)
    if j % 1000 == 0:
        print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_train)))

guide.requires_grad_(False)

params = []
for name, value in pyro.get_param_store().items():
    params.append(pyro.param(name))

  0%|          | 0/5000 [00:00<?, ?it/s]

ValueError: at site "obs", invalid log_prob shape
  Expected [107], actual [107, 107]
  Try one of the following fixes:
  - enclose the batched tensor in a with pyro.plate(...): context
  - .to_event(...) the distribution being sampled
  - .permute() data dimensions

In [29]:
model_loss = 'multi_class_linear_output'

mean, stds = params
# Effect of tau
tau = alpha # Prior Precision
tau_out = 1. # Output Precision

tau_list = []
for w in net.parameters():
    tau_list.append(tau) # set the prior precision to be the same for each set of weights
tau_list = torch.tensor(tau_list).to(device)


variational_posterior = D.MultivariateNormal(loc=mean, covariance_matrix=torch.diag(stds ** 2))

variational_samps = variational_posterior.sample((1000,))

In [30]:
pred_list_val, _ = hamiltorch.predict_model(net, x=x_val, y=y_val, samples=variational_samps, 
                                                    model_loss='multi_class_linear_output', tau_out=1., 
                                                    tau_list=tau_list)

compute_categorical_mixture(pred_list_val, y_val)

(tensor(0.6500), tensor(-86.9254))

In [31]:
pred_list_train, _ = hamiltorch.predict_model(net, x=x_train, y=y_train, samples=variational_samps, 
                                                    model_loss='multi_class_linear_output', tau_out=1., 
                                                    tau_list=tau_list)

compute_categorical_mixture(pred_list_train, y_train)

(tensor(0.7000), tensor(-43.1921))

In [32]:
prior = D.MultivariateNormal(loc=torch.zeros_like(mean), covariance_matrix = 1./alpha * torch.eye(mean.shape[-1]))
D.kl_divergence(variational_posterior, prior)

tensor(25.3537)

# Compress some weights with variational scheme

In [33]:
#### sample weights with compression algorithm
from rec.beamsearch.Coders.Encoder_Variational import Encoder
from rec.beamsearch.distributions.CodingSampler import CodingSampler
from rec.beamsearch.distributions.VariationalPosterior import VariationalPosterior
from rec.beamsearch.samplers.GreedySampling import GreedySampler
from rec.OptimisingVars.VariationalOptimiser import VariationalOptimiser
from tqdm.notebook import trange
coding_sampler = CodingSampler
auxiliary_posterior = VariationalPosterior
selection_sampler = GreedySampler
omega = 5

initial_seed = 0
beamwidth = 1
epsilon = 0.



compute_params_enc =  encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1./alpha)

n_auxiliaries = compute_params_enc.n_auxiliary
kl_q_p = compute_params_enc.total_kl
var_opt = VariationalOptimiser(compute_params_enc.target, omega, n_auxiliaries, kl_q_p, n_trajectories=16, total_var=1./alpha)
aux_vars = var_opt.run_optimiser(epochs=1000, lr=1e-1)

The mean loss is 3.12623. The mean KL is: 4.88095: 100%|██████████| 1000/1000 [05:04<00:00,  3.28it/s]


In [34]:
kl_q_p

tensor(25.3537)

In [35]:
initial_seed = 0
beamwidth = 1
epsilon = 0.

compressed_weights_low_eps = []
num_compressed_samples = 500
for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1./alpha)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    w, idx = encoder.run_encoder()

    compressed_weights_low_eps.append(w[0])

  0%|          | 0/500 [00:00<?, ?it/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448278899/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [36]:
pred_list_low_eps, _ = hamiltorch.predict_model(net, x=x_val, y=y_val, samples=compressed_weights_low_eps, 
                                                    model_loss='multi_class_linear_output', tau_out=1., 
                                                    tau_list=tau_list)

compute_categorical_mixture(pred_list_low_eps, y_val)

(tensor(0.6500), tensor(-93.0654))

In [37]:
initial_seed = 0
beamwidth = 1
epsilon = 0.1

compressed_weights_var_med_eps = []
num_compressed_samples = 500
for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1./alpha)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    w, idx = encoder.run_encoder()

    compressed_weights_var_med_eps.append(w[0])

  0%|          | 0/500 [00:00<?, ?it/s]

In [38]:
pred_list_var_med_eps, _ = hamiltorch.predict_model(net, x=x_val, y=y_val, samples=compressed_weights_var_med_eps, 
                                                    model_loss='multi_class_linear_output', tau_out=1., 
                                                    tau_list=tau_list)

compute_categorical_mixture(pred_list_var_med_eps, y_val)

(tensor(0.6500), tensor(-91.0513))

In [39]:
initial_seed = 0
beamwidth = 1
epsilon = 0.2

compressed_weights_var_high_eps = []
num_compressed_samples = 500
for i in trange(num_compressed_samples):
    initial_seed = initial_seed + i * 10
    encoder = Encoder(variational_posterior,
                      initial_seed,
                      coding_sampler,
                      selection_sampler,
                      auxiliary_posterior,
                      omega,
                      epsilon=epsilon,
                      beamwidth=beamwidth,
                      prior_var=1./alpha)
    
    encoder.auxiliary_posterior.coding_sampler.auxiliary_vars = aux_vars
    w, idx = encoder.run_encoder()

    compressed_weights_var_high_eps.append(w[0])

  0%|          | 0/500 [00:00<?, ?it/s]

In [40]:
pred_list_var_high_eps, _ = hamiltorch.predict_model(net, x=x_val, y=y_val, samples=compressed_weights_var_high_eps, 
                                                    model_loss='multi_class_linear_output', tau_out=1., 
                                                    tau_list=tau_list)

compute_categorical_mixture(pred_list_var_high_eps, y_val)

(tensor(0.6500), tensor(-89.9019))

In [41]:
# dump the important stuff
import pickle as pkl
pkl.dump(variational_posterior, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_post_beta_{ELBO_BETA}.pkl', 'wb'))
pkl.dump(aux_vars, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_optimised_vars_beta_{ELBO_BETA}.pkl', 'wb'))
pkl.dump(kl_q_p, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_kl_beta_{ELBO_BETA}.pkl', 'wb'))
pkl.dump(variational_samps, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_exact_beta_{ELBO_BETA}.pkl', 'wb'))
pkl.dump(compressed_weights_low_eps, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_beta_{ELBO_BETA}_eps_0.pkl', 'wb'))
pkl.dump(compressed_weights_var_med_eps, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_beta_{ELBO_BETA}_eps_0.1.pkl', 'wb'))
pkl.dump(compressed_weights_var_high_eps, open(f'PickledStuff/BNN_BETA_CLASSIFICATION/VAR/var_beta_{ELBO_BETA}_eps_0.2.pkl', 'wb'))