In [1]:
# General imports
import numpy as np
import torch

from deepymod.data import Dataset
from deepymod.data.burgers import BurgersDelta
from sklearn.linear_model import BayesianRidge, ARDRegression

import seaborn as sns
from scipy.linalg import pinvh

# Making data

In [2]:
# Making dataset
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)

y = dataset.time_deriv(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1)) # observations
X = dataset.library(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), poly_order=2, deriv_order=3) # covariates

print(y.shape, X.shape)

(5000, 1) (5000, 12)


In [3]:
y += np.std(y) * 0.5 * np.random.randn(*y.shape)

In [4]:
np.std(y) * 0.5

0.08815537241970857

# SBL

In [21]:
def SBL_loss(X, y, alpha_, beta_, threshold=False):
    if isinstance(threshold, float):
        mask = alpha_ < threshold
    else:
        mask = torch.ones_like(alpha_, dtype=torch.bool)
    
    n_samples = X.shape[0]

    X_keep = X[:, mask]
    A_inv = torch.inverse(torch.diag(alpha_[mask]) + beta_ * X_keep.T @ X_keep)
    mn = torch.zeros((alpha_.shape[0], 1)).to(X.device)
    mn[mask, :] = beta_ * A_inv @ X_keep.T @ y
    E = beta_ * torch.sum((y - X @ mn)**2) + (alpha_[:, None] * mn**2).sum()

    p_reg = E - (torch.logdet(A_inv) + n_samples * torch.log(beta_) + torch.sum(torch.log(alpha_))) # we use alpha and lambda since these are bounded
    return p_reg, mn

In [22]:
# Now let's optimize
X = torch.tensor(X, dtype=torch.float32)
X = X / torch.norm(X, dim=0, keepdim=True)
y = torch.tensor(y, dtype=torch.float32)

a = torch.nn.Parameter(torch.zeros(12, dtype=torch.float32))
b = torch.nn.Parameter(-torch.log(torch.var(y)))

optimizer = torch.optim.Adam([a, b], lr=1e-2)
max_epochs=1e4

  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.float32)


In [23]:
for epoch in torch.arange(max_epochs):
    alpha_ = torch.min(torch.exp(a), torch.tensor(1e8, dtype=torch.float32))
    beta_ = torch.min(torch.exp(b), torch.tensor(2e4, dtype=torch.float32))
    loss = SBL_loss(X, y, alpha_, beta_)[0]
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 1000 == 0:
        print(loss)

tensor(-16199.5645, grad_fn=<SubBackward0>)
tensor(-20188.8105, grad_fn=<SubBackward0>)
tensor(-20190.8887, grad_fn=<SubBackward0>)
tensor(-20191.1055, grad_fn=<SubBackward0>)
tensor(-20191.1699, grad_fn=<SubBackward0>)
tensor(-20191.1973, grad_fn=<SubBackward0>)
tensor(-20191.2090, grad_fn=<SubBackward0>)
tensor(-20191.2188, grad_fn=<SubBackward0>)
tensor(-20191.2207, grad_fn=<SubBackward0>)
tensor(-20191.2266, grad_fn=<SubBackward0>)


In [24]:
SBL_loss(X, y, alpha_, beta_)[1]

tensor([[ 5.8950e-06],
        [-2.1367e-05],
        [ 7.2259e+00],
        [ 9.6952e-08],
        [ 3.8401e-05],
        [-1.0376e+01],
        [ 7.6844e-07],
        [ 1.0455e-06],
        [ 9.3409e-02],
        [-6.6177e-06],
        [ 6.0028e-06],
        [ 4.2977e-06]], grad_fn=<CopySlices>)

In [25]:
alpha_[:, None]

tensor([[4.8486e+05],
        [6.1259e+04],
        [1.9149e-02],
        [4.7361e+05],
        [1.6055e+05],
        [9.2873e-03],
        [3.9678e+05],
        [7.7817e+05],
        [6.8181e+01],
        [2.0137e+04],
        [4.4681e+05],
        [6.5886e+05]], grad_fn=<UnsqueezeBackward0>)

In [26]:
beta_

tensor(154.8037, grad_fn=<MinBackward2>)

# Bayesian ridge

## Baseline

In [36]:
from sklearn.linear_model import BayesianRidge

In [37]:
reg = BayesianRidge(alpha_1=0, alpha_2=0, lambda_1=0, lambda_2=0, fit_intercept=False, compute_score=True)

In [38]:
reg.fit(X, y)

  return f(**kwargs)


BayesianRidge(alpha_1=0, alpha_2=0, compute_score=True, fit_intercept=False,
              lambda_1=0, lambda_2=0)

In [40]:
reg.coef_[:, None]

array([[ 0.0168327 ],
       [-0.35016102],
       [ 7.59289441],
       [ 0.46593763],
       [-0.36257924],
       [-9.18487524],
       [-1.23731263],
       [-0.58438872],
       [ 0.5934701 ],
       [-1.06542042],
       [ 1.2996297 ],
       [ 0.03825993]])

In [41]:
reg.lambda_

0.07816731595357417

## Own imp

In [42]:
def BR_loss(X, y, alpha_, beta_):
    n_samples = X.shape[0]
    alpha = alpha_ * torch.ones(X.shape[1])
    
    A_inv = torch.inverse(torch.diag(alpha) + beta_ * X.T @ X)
    mn = beta_ * A_inv @ X.T @ y
    E = beta_ * torch.sum((y - X @ mn)**2) + (alpha[:, None] * mn**2).sum()

    p_reg = E - (torch.logdet(A_inv) + n_samples * torch.log(beta_) + torch.sum(torch.log(alpha))) # we use alpha and lambda since these are bounded
    return p_reg, mn

In [43]:
# Now let's optimize
X = torch.tensor(X, dtype=torch.float32)
X = X / torch.norm(X, dim=0, keepdim=True)
y = torch.tensor(y, dtype=torch.float32)

a = torch.nn.Parameter(torch.zeros(1, dtype=torch.float32))
b = torch.nn.Parameter(-torch.log(torch.var(y)))

optimizer = torch.optim.Adam([a, b], lr=1e-2)
max_epochs=1e4

  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.float32)


In [44]:
for epoch in torch.arange(max_epochs):
    alpha_ = torch.min(torch.exp(a), torch.tensor(1e8, dtype=torch.float32))
    beta_ = torch.min(torch.exp(b), torch.tensor(2e4, dtype=torch.float32))
    loss = BR_loss(X, y, alpha_, beta_)[0]
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 1000 == 0:
        print(loss)

tensor(-16199.5645, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4551, grad_fn=<SubBackward0>)
tensor(-20138.4551, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)
tensor(-20138.4570, grad_fn=<SubBackward0>)


In [45]:
alpha_

tensor([0.0782], grad_fn=<MinBackward2>)

In [46]:
BR_loss(X, y, alpha_, beta_)[1]

tensor([[ 0.0169],
        [-0.3498],
        [ 7.5921],
        [ 0.4656],
        [-0.3633],
        [-9.1862],
        [-1.2336],
        [-0.5853],
        [ 0.5948],
        [-1.0653],
        [ 1.2974],
        [ 0.0392]], grad_fn=<MmBackward>)

In [47]:
beta_

tensor(154.6533, grad_fn=<MinBackward2>)

Seems the same, great :-)