In [9]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [10]:
N = 1000

In [11]:
x1 = np.random.normal(3, 2, N)
x2 = np.random.normal(0, 5, N)

X = np.vstack((x1, x2)).T

# Noise
n1 = np.random.normal(0, 2, N)
n2 = np.random.normal(0, 6, N)
n3 = np.random.normal(0, 10, N)

y1 = x1**2 + 5*x1*x2 + 3*np.abs(x2) + n1

In [12]:
X_train = X[:800]
y1_train = y1[:800]

X_test = X[800:]
y1_test = y1[800:]

In [187]:
class ANN(nn.Module):
    # Initialize the layers
    def __init__(self, input_size, output_size):
        super().__init__()
        
        self.act1 = nn.ReLU()
        self.linear1 = nn.Linear(input_size, 6).double()
        self.bn = nn.BatchNorm1d(6).double()
        self.linear2 = nn.Linear(6, output_size).double()
    
    # Perform the computation
    def forward(self, x):
        x = self.linear1(x)
        x = self.bn(x)
        x = self.act1(x)
        x = self.linear2(x)
        x = self.act1(x)
        return x

In [184]:
class VariationalInference(object):
    def __init__(self, 
                 prior_mu, prior_rho,
                 theta_mu_init, theta_rho_init
                ):
        super().__init__()
        
        # Defining Prior distribution (Gaussian)
        self.prior_mu = torch.tensor(prior_mu)
        self.prior_rho = torch.tensor(prior_rho)
        
        # Defining Variational class (Gaussian class)
        self.theta_mu_init = nn.Parameter(torch.tensor(theta_mu_init))
        self.theta_rho_init = nn.Parameter(torch.tensor(theta_rho_init))
        
        # Defining some constants
        self.logsqrttwopi = torch.log(torch.sqrt(2*torch.tensor(torch.pi)))
        self.K = torch.tensor(1)
    
    def rho_to_sigma(weight_rho):
        return torch.log(1 + torch.exp(weight_rho))

    def sample_weight(weight_mu, weight_rho):
        w = (weight_mu 
        + rho_to_sigma(weight_rho)*torch.randn(weight_mu.shape))
        return w

    def log_prob_gaussian(x, mu, rho):
            return (
                - self.logsqrttwopi
                - rho_to_sigma(rho)
                - ((x - mu)**2)/(2*rho_to_sigma(rho)**2)
            ).sum()
    
    def prior(w):
        return self.log_prob_gaussian(
            w, self.prior_mu, self.prior_rho)
        
    def variational(w, weight_mu, weight_rho):
        return self.log_prob_gaussian(
            w, weight_mu, weight_rho) 

In [185]:
vi_obj = VariationalInference(0, 0.54, 0, 0.54)

In [171]:
rho_to_sigma(torch.tensor(0.54))

tensor(0.9992)

In [172]:
torch.exp(weight_rho)

tensor([[0.3662, 0.2516]], grad_fn=<ExpBackward0>)

In [173]:
input_size = 2
output_size = 1

weight_mu = nn.Parameter(torch.Tensor(output_size, input_size).uniform_(-0.2, 0.2))
weight_rho = nn.Parameter(torch.Tensor(output_size, input_size).uniform_(-2,-1))

In [174]:
w = sample_weight(weight_mu, weight_rho)

In [175]:
w

tensor([[-0.0181, -0.4754]], grad_fn=<AddBackward0>)

In [176]:
prior(w)

tensor(-3.9496, grad_fn=<SumBackward0>)

In [177]:
variational_posterior(w, weight_mu, weight_rho)

tensor(-4.1615, grad_fn=<SumBackward0>)