In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Generate data
x_obs = np.hstack([np.linspace(-0.2, 0.2, 500), np.linspace(0.6, 1, 500)])
noise = 0.02 * np.random.randn(x_obs.shape[0])
y_obs = x_obs + 0.3 * np.sin(2 * np.pi * (x_obs + noise)) + 0.3 * np.sin(4 * np.pi * (x_obs + noise)) + noise

x_true = np.linspace(-0.5, 1.5, 1000)
y_true = x_true + 0.3 * np.sin(2 * np.pi * x_true) + 0.3 * np.sin(4 * np.pi * x_true)

# Set plot limits and labels
xlims = [-0.5, 1.5]
ylims = [-1.5, 2.5]

# Create plot
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(x_true, y_true, 'b-', linewidth=3, label="True function")
ax.plot(x_obs, y_obs, 'ko', markersize=4, label="Observations")
ax.set_xlim(xlims)
ax.set_ylim(ylims)
ax.set_xlabel("X", fontsize=30)
ax.set_ylabel("Y", fontsize=30)
ax.legend(loc=4, fontsize=15, frameon=False)

plt.show();

In [3]:
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
import torch.nn as nn

class BNN(PyroModule):
    def __init__(self, in_dim=1, out_dim=1, hid_dim=5, prior_scale=10.):
        super().__init__()

        self.activation = nn.Tanh()  # or nn.ReLU()
        self.layer1 = PyroModule[nn.Linear](in_dim, hid_dim)  # Input to hidden layer
        self.layer2 = PyroModule[nn.Linear](hid_dim, out_dim)  # Hidden to output layer

        # Set layer parameters as random variables
        self.layer1.weight = PyroSample(dist.Normal(0., prior_scale).expand([hid_dim, in_dim]).to_event(2))
        self.layer1.bias = PyroSample(dist.Normal(0., prior_scale).expand([hid_dim]).to_event(1))
        self.layer2.weight = PyroSample(dist.Normal(0., prior_scale).expand([out_dim, hid_dim]).to_event(2))
        self.layer2.bias = PyroSample(dist.Normal(0., prior_scale).expand([out_dim]).to_event(1))

    def forward(self, x, y=None):
        x = x.reshape(-1, 1)
        x = self.activation(self.layer1(x))
        mu = self.layer2(x).squeeze()
        sigma = pyro.sample("sigma", dist.Gamma(.5, 1))  # Infer the response noise

        # Sampling model
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mu, sigma * sigma), obs=y)
        return mu

In [None]:
from pyro.infer import MCMC, NUTS

model = BNN()

# Set Pyro random seed
pyro.set_rng_seed(42)

# Define Hamiltonian Monte Carlo (HMC) kernel
# NUTS = "No-U-Turn Sampler" (https://arxiv.org/abs/1111.4246), gives HMC an adaptive step size
nuts_kernel = NUTS(model, jit_compile=True)  # jit_compile=True is faster but requires PyTorch 1.6+

# Define MCMC sampler, get 50 posterior samples
mcmc = MCMC(nuts_kernel, num_samples=50)

# Convert data to PyTorch tensors
x_train = torch.from_numpy(x_obs).float()
y_train = torch.from_numpy(y_obs).float()

# Run MCMC
mcmc.run(x_train, y_train)

In [9]:
from pyro.infer import Predictive

predictive = Predictive(model=model, posterior_samples=mcmc.get_samples())
x_test = torch.linspace(xlims[0], xlims[1], 3000)
preds = predictive(x_test)

In [10]:
def plot_predictions(preds):
    y_pred = preds['obs'].T.detach().numpy().mean(axis=1)
    y_std = preds['obs'].T.detach().numpy().std(axis=1)

    fig, ax = plt.subplots(figsize=(10, 5))
    xlims = [-0.5, 1.5]
    ylims = [-1.5, 2.5]
    plt.xlim(xlims)
    plt.ylim(ylims)
    plt.xlabel("X", fontsize=30)
    plt.ylabel("Y", fontsize=30)

    ax.plot(x_true, y_true, 'b-', linewidth=3, label="true function")
    ax.plot(x_obs, y_obs, 'ko', markersize=4, label="observations")
    ax.plot(x_obs, y_obs, 'ko', markersize=3)
    ax.plot(x_test, y_pred, '-', linewidth=3, color="#408765", label="predictive mean")
    ax.fill_between(x_test, y_pred - 2 * y_std, y_pred + 2 * y_std, alpha=0.6, color='#86cfac', zorder=5)

    plt.legend(loc=4, fontsize=15, frameon=False)

In [None]:
plot_predictions(preds)

In [40]:
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
from sklearn.datasets import make_regression
train_size = 1000
batch_size = 10000
test_size = batch_size - train_size 
X, y = make_regression(n_samples=batch_size, n_features=5, noise=1, random_state=42)
X.shape

data_train = torch.tensor(X[:train_size], dtype=torch.float32).to(device)
target_train = torch.tensor(y[:train_size], dtype=torch.float32).to(device)

data_test = torch.tensor(X[train_size:], dtype=torch.float32).to(device)
target_test = torch.tensor(y[train_size:], dtype=torch.float32).to(device)
data_test

In [34]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert tensors into TensorDataset
train_dataset = TensorDataset(data_train, target_train)
test_dataset = TensorDataset(data_test, target_test)

# Create DataLoader for both datasets
train_loader = DataLoader(train_dataset, batch_size=train_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_size, shuffle=False)

In [39]:
# for x, y in train_loader:
#     print(x)
#     print(y)

In [41]:
class BaselineTorchModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(BaselineTorchModel, self).__init__()
        self.hidden_layer = nn.Linear(input_dim, hidden_dim)
        self.out_layer = nn.Linear(hidden_dim, 1)
        self.act = torch.sigmoid
        
    def forward(self, inputs):
        h = self.hidden_layer(inputs)
        h = self.act(h)
        output = self.out_layer(h)
        
        # we add a dummy output, a placeholder for future experiments
        return output, None

In [None]:
baseline_torch_model = BaselineTorchModel(X.shape[1], 32).to(device)
baseline_torch_model

In [None]:
[p.numel() for p in baseline_torch_model.parameters()]

In [None]:
samples = 10
examples_torch, targets_torch = next(iter(test_loader))
predicted, _ = baseline_torch_model(examples_torch[:samples])
predicted = predicted.detach().cpu().numpy()
for idx in range(samples):
    print(f"Predicted: {round(float(predicted[idx][0]), 1)} - Actual: {targets_torch[idx]}")

In [35]:
class BnnTorch(nn.Module):
    def __init__(self, input_dim, hidden_dim, activation=None):
        super(BnnTorch, self).__init__()
        n = input_dim * hidden_dim
        self.mu = nn.Parameter(torch.zeros((n), dtype=torch.float32))
        self.rho  = nn.Parameter(torch.log(torch.expm1(torch.ones((n), dtype=torch.float32))))
        self.out_layer = nn.Linear(hidden_dim, 1)
        self.act = activation
        self.hidden_dim = hidden_dim
        self.prior = torch.distributions.Normal(loc=torch.zeros((n), device=device, dtype=torch.float32),
                                                scale=torch.ones((n), device=device, dtype=torch.float32))
        self.kl_func = torch.distributions.kl.kl_divergence
        self.batch_norm = torch.nn.BatchNorm1d(input_dim)

        
    def forward(self, inputs):
        inputs = self.batch_norm(inputs)
        q = torch.distributions.Normal(loc=self.mu, 
                                       scale=torch.log(1.+torch.exp(self.rho)))
        
        kl = torch.sum(self.kl_func(q, self.prior))
        # we use q.rsample() which uses the reparametrization trick instead of 
        # q.sample() which breaks the auto-differentation path
        w = q.rsample() 
        w = w.reshape((-1, self.hidden_dim))
        h = inputs @ w
        if self.act is not None:
            h = self.act(h)
        output = self.out_layer(h)
        return output, kl

In [36]:
from tqdm import tqdm

def run_experiment_torch(model, loss, num_epochs, train_dataloader, test_dataloader):
    optimizer = optim.RMSprop(model.parameters(), lr=0.001)
    model.train()
    for e in tqdm(range(num_epochs)):
        for x, y in train_dataloader:
            optimizer.zero_grad()
            loss_value = loss(model(x), y)
            loss_value.backward()
            optimizer.step()
            
    model.eval()
    errors = []
    for x,y in test_dataloader:
        yhat, _ = model(x)
        errors.append(((torch.squeeze(yhat) - y)**2).detach().cpu().numpy())
  
    rmse = np.sqrt(np.mean(np.concatenate(errors, axis=None)))
    print(f"Test RMSE: {round(rmse, 3)}")

In [None]:
bnn_torch = BnnTorch(X.shape[1], 32).to(device)
bnn_torch

In [None]:
kl_weight = 1. / train_size

def mse_kl_loss(model_outputs, y_true):
    yhat, kl = model_outputs
    yhat = torch.squeeze(yhat)
    mse = torch.nn.MSELoss()(yhat, y_true)
    return mse + kl * kl_weight

run_experiment_torch(bnn_torch, 
                     mse_kl_loss, 
                     200, train_loader, test_loader)

In [49]:
def display_predictions(predictions, targets):
    prediction_mean = np.mean(predictions, axis=1).tolist()
    prediction_min = np.min(predictions, axis=1).tolist()
    prediction_max = np.max(predictions, axis=1).tolist()
    prediction_range = (np.max(predictions, axis=1) - np.min(predictions, axis=1)).tolist()

    for idx in range(samples):
        print(
            f"Predictions mean: {round(prediction_mean[idx], 2)}, "
            f"min: {round(prediction_min[idx], 2)}, "
            f"max: {round(prediction_max[idx], 2)}, "
            f"range: {round(prediction_range[idx], 2)} - "
            f"Actual: {targets[idx]}"
        )

In [50]:
def compute_predictions_torch(model, iterations=100):
    predicted = []
    model.eval()
    for _ in range(iterations):
        preds, _ = model(examples_torch)
        predicted.append(preds.detach().cpu().numpy())
    predicted = np.concatenate(predicted, axis=1)
    return predicted

In [None]:
predictions = compute_predictions_torch(bnn_torch)

display_predictions(predictions, targets_torch)

In [None]:
targets_torch.shape

In [None]:
import matplotlib.pyplot as plt

plt.boxplot(predictions[:10].T)
plt.plot(range(1,11), targets_torch[:10].cpu(), 'r.', alpha=0.8);