In this notebook we compare two different SBL implementations.

In [1]:
# General imports
import numpy as np
import torch

from deepymod.data import Dataset
from deepymod.data.burgers import BurgersDelta
from sklearn.linear_model import BayesianRidge, ARDRegression

import seaborn as sns

# Making data

In [2]:
# Making dataset
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)

y = dataset.time_deriv(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1)) # observations
X = dataset.library(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), poly_order=2, deriv_order=3) # covariates

print(y.shape, X.shape)

(5000, 1) (5000, 12)


In [3]:
y += np.std(y) * 0.1 * np.random.randn(*y.shape)

# Baselineregressor.lambda_

In [4]:
regressor = ARDRegression(fit_intercept=False, compute_score=True, alpha_1=0, alpha_2=0, lambda_1=0, lambda_2=0)
regressor.fit(X, y.squeeze())

ARDRegression(alpha_1=0, alpha_2=0, compute_score=True, fit_intercept=False,
              lambda_1=0, lambda_2=0)

In [5]:
baseline_coeffs = regressor.coef_[:, None]
print(baseline_coeffs)

[[ 0.        ]
 [ 0.        ]
 [ 0.09940434]
 [ 0.        ]
 [ 0.        ]
 [-1.00331341]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]
 [ 0.        ]]


In [6]:
baseline_noise_precision = regressor.alpha_
print(baseline_noise_precision)

3902.8775100881894


In [7]:
baseline_prior_precision = regressor.lambda_
print(baseline_prior_precision[:, None])

[[4.78668323e+07]
 [1.02399289e+04]
 [1.01201526e+02]
 [1.88313589e+07]
 [2.97119441e+05]
 [9.93403556e-01]
 [2.70580589e+04]
 [7.43639403e+07]
 [3.40000068e+04]
 [2.35319474e+04]
 [2.86066806e+04]
 [3.68144668e+06]]


In [8]:
regressor.scores_

[18184.616009957303, 18218.39333270795, 18226.32657003038, 18226.327448838227]

# Direct

In [113]:
threshold = 1e4
mask = (alpha < threshold)[:, None]
alpha = torch.tensor(baseline_prior_precision)
beta = torch.tensor(baseline_noise_precision)

alpha_inv = torch.diag(alpha**-1) * mask

In [116]:
X = torch.tensor(X)
y = torch.tensor(y)

N = X.shape[0]
M = X.shape[1]

  X = torch.tensor(X)
  y = torch.tensor(y)


In [115]:
C = beta**-1 * torch.eye(N) + X @ alpha_inv @ X.T 

RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #3 'mat2' in call to _th_addmm_out

In [27]:
p =-1/2 * (N * np.log(2*np.pi) + torch.sum(torch.log(torch.diag(C))) + y.T @ torch.inverse(C) @ y)

In [28]:
p

tensor([[9288.0976]], dtype=torch.float64)

# Direct using woodbury

Let's first check this out using the found values to make sure everything is correct.

In [117]:
C_inv = beta * (torch.eye(N) - X @ alpha_inv @ torch.inverse(beta**-1 * torch.eye(M)  + X.T @ X @ alpha_inv.T) @ X.T)

p =-1/2 * (N * np.log(2*np.pi) - torch.sum(torch.log(torch.diag(C_inv))) + y.T @ C_inv @ y)

RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #3 'mat2' in call to _th_addmm_out

In [118]:
print(p)

tensor([[9288.0975]], dtype=torch.float64)


In [119]:
C_inv = beta * (torch.eye(N) - X @ alpha_inv @ torch.inverse(beta**-1 * torch.eye(M)  + X.T @ X @ alpha_inv.T) @ X.T)

p =-1/2 * (N * np.log(2*np.pi) + torch.sum(torch.log(torch.diag(C))) + y.T @ C_inv @ y)

RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #3 'mat2' in call to _th_addmm_out

In [54]:
print(p)

tensor([[9288.0975]], dtype=torch.float64)


# Indirect using bishop's formula

In [60]:
A_inv = alpha_inv @ torch.inverse(torch.eye(M) + beta * X.T @ X @ alpha_inv.T)
mn = beta * A_inv @ X.T @ y

In [61]:
mn

tensor([[ 0.0000],
        [ 0.0000],
        [ 0.0994],
        [ 0.0000],
        [ 0.0000],
        [-1.0033],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000],
        [ 0.0000]], dtype=torch.float64)

In [70]:
E = beta * torch.sum((y - X @ mn)**2) + mn.T @ torch.diag(alpha) @ mn
loss = (-1/2 * torch.sum(torch.log(torch.diag(alpha_inv)[mask.squeeze()])) 
        + N / 2 * torch.log(beta)
        - E 
        + 1/2 * torch.sum(torch.log(torch.diag(A_inv)[mask.squeeze()])) 
        - N/2 * np.log(2 * np.pi))

In [72]:
print(loss)

tensor([[11066.4237]], dtype=torch.float64)


In [100]:
threshold = 1e4
a = torch.nn.Parameter(torch.zeros(12))
b = torch.nn.Parameter(-torch.log(torch.var(y)))

In [101]:
optimizer = torch.optim.Adam([a, b], lr=0.1)
max_epochs = 1e4

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.float32)


In [102]:
for epoch in torch.arange(max_epochs):
    alpha = torch.exp(a)
    beta = torch.exp(b)
    
    
    alpha_inv = torch.diag(alpha**-1) * (alpha < threshold)[:, None]
    A_inv = alpha_inv @ torch.inverse(torch.eye(M) + beta * X.T @ X @ alpha_inv.T)
    mn = beta * A_inv @ X.T @ y
    E = beta * torch.sum((y - X @ mn)**2) + mn.T @ torch.diag(alpha) @ mn
    ll = (-1/2 * torch.sum(torch.log(torch.diag(alpha_inv)[mask.squeeze()])) 
            + N / 2 * torch.log(beta)
            - E 
            + 1/2 * torch.sum(torch.log(torch.diag(A_inv)[mask.squeeze()])) 
            - N/2 * np.log(2 * np.pi))
    loss = -ll
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 1000 == 0:
        print(ll)

tensor([[4479.7905]], grad_fn=<SubBackward0>)
tensor([[11842.3652]], grad_fn=<SubBackward0>)
tensor([[11842.3652]], grad_fn=<SubBackward0>)
tensor([[11842.3652]], grad_fn=<SubBackward0>)


KeyboardInterrupt: 

In [103]:
ll

tensor([[11842.3652]], grad_fn=<SubBackward0>)

In [105]:
mn

tensor([[-1.6781e-04],
        [-4.6799e-03],
        [ 9.9049e-02],
        [ 1.2948e-04],
        [ 2.4755e-03],
        [-9.8386e-01],
        [-2.5128e-03],
        [-6.4078e-05],
        [-1.0621e-02],
        [-1.3537e-02],
        [ 2.4004e-03],
        [-1.2904e-04]], grad_fn=<MmBackward>)

In [107]:
torch.diag(alpha_inv)

tensor([2.8467e+00, 3.5598e+07, 1.9621e-02, 1.2337e+05, 1.6351e+01, 1.9376e+00,
        8.4520e+00, 2.2778e+02, 5.6448e+06, 5.1708e+02, 2.4766e+03, 4.9008e-01],
       grad_fn=<DiagBackward>)

In [109]:
alpha[:, None]

tensor([[3.5129e-01],
        [2.8092e-08],
        [5.0966e+01],
        [8.1056e-06],
        [6.1157e-02],
        [5.1610e-01],
        [1.1832e-01],
        [4.3903e-03],
        [1.7715e-07],
        [1.9339e-03],
        [4.0377e-04],
        [2.0405e+00]], grad_fn=<UnsqueezeBackward0>)

# Bishop + woodbury