# Train and evaluate a PC

In [1]:
import random
import torch
import numpy as np
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cpu")  # The device to use, e.g., "cpu", "cuda", "cuda:1"

%load_ext autoreload
%autoreload 2

In [3]:
%reload_ext autoreload

Set the random seeds.

In [4]:
random.seed(4)
np.random.seed(4)
torch.manual_seed(4)
# if 'cuda' in device.type:
#     torch.cuda.manual_seed(42)

<torch._C.Generator at 0x7fb8a072b8f0>

## Load MNIST Dataset

Load the training and test splits of MNIST, and preprocess them by flattening the tensor images.

In [5]:
import torch.nn.functional as F

from uci_datasets import Dataset

from ignite.engine import Events, Engine
from ignite.metrics import Average, Loss
from ignite.contrib.handlers import ProgressBar

import gpytorch
from gpytorch.mlls import VariationalELBO
from gpytorch.likelihoods import GaussianLikelihood

import pandas as pd
import numpy as np


In [6]:
data = Dataset("kin40k")
x_train, y_train, x_test, y_test = data.get_split(split=2)

kin40k dataset, N=40000, d=8


In [7]:
x_train.shape, x_test.shape

((36000, 8), (4000, 8))

In [8]:
x_train_real = x_train[:32000] #32000 # 2053   36584    36584     39063   13281    2672   # RE-RUN # 13279   # 1279   4701  824
y_train_real = y_train[:32000]
y_train_real = y_train_real.squeeze()
x_val = x_train[32000:]
y_val = y_train[32000:]
y_val = y_val.squeeze()
y_test = y_test.squeeze()

In [9]:
mean = x_train_real.mean(axis=0)
std = x_train_real.std(axis=0)

x_train_real_normalized = (x_train_real - mean) / std
x_val_normalized = (x_val - mean) / std
x_test_normalized = (x_test - mean) / std

In [13]:
# from cirkit.models.smgp import initialize_from_data
from cirkit.layers.input.sm_layer_imag import SMKernelPosImagLayer, SMKernelNegImagLayer, SMKernelImagLayerParams, SMKernelImagFlattenLayerParams
from cirkit.models.smgp import CircuitSMGP
from cirkit.models.gp import CircuitGP, initial_values

params_module = SMKernelImagLayerParams(num_vars=8, num_output_units=10)
# params_module = SMKernelImagFlattenLayerParams(num_vars=8, num_output_units=30)
# initialize_from_data(params_module, torch.tensor(x_train_real), torch.tensor(y_train_real))

In [14]:
from cirkit.region_graph.poon_domingos import PoonDomingos
from cirkit.region_graph.random_binary_tree import RandomBinaryTree
from cirkit.region_graph.fully_factorized import FullyFactorized
from cirkit.region_graph.quad_tree import QuadTree
# region_graph = QuadTree(width, height, struct_decomp=False)
region_graph = RandomBinaryTree(num_vars=8, depth=3, num_repetitions=4)
# region_graph = FullyFactorized(num_vars=8)



In [15]:
from cirkit.layers.input.exp_family import CategoricalLayer
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.input.rbf_kernel import RBFKernelLayer

efamily_cls = params_module 
efamily_kwargs = {}
layer_cls = CPLayer
layer_kwargs = {'rank': 1}


In [16]:
from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax
from cirkit.models.tensorized_circuit import TensorizedPC
from cirkit.models.tensorized_SM_circuit import TensorizedSMPC
pc_sm = TensorizedSMPC.from_region_graph(
    region_graph,
    num_inner_units=10,
    num_input_units=10,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=1,
    reparam=ReparamExp # ReparamLogSoftmax # ReparamExp ReparamSoftmax
)
pc_sm.to(device)
print(pc_sm)



TensorizedSMPC(
  (input_layer_params): SMKernelImagLayerParams(
    (params_sigma): ReparamExp()
    (params_mu): ReparamIdentity()
    (params_weight): ReparamSoftmax()
  )
  (input_layer_pos): SMKernelPosImagLayer(
    (params): SMKernelImagLayerParams(
      (params_sigma): ReparamExp()
      (params_mu): ReparamIdentity()
      (params_weight): ReparamSoftmax()
    )
  )
  (input_layer_neg): SMKernelNegImagLayer(
    (params): SMKernelImagLayerParams(
      (params_sigma): ReparamExp()
      (params_mu): ReparamIdentity()
      (params_weight): ReparamSoftmax()
    )
  )
  (scope_layer): ScopeLayer()
  (inner_layers): ModuleList(
    (0-2): 3 x CollapsedCPLayer(
      (params_in): ReparamExp()
    )
    (3): SumLayer(
      (params): ReparamExp()
    )
  )
)


In [17]:
for param in pc_sm.parameters(): 
    print (param.shape)

torch.Size([10, 1, 8])
torch.Size([10, 1, 8])
torch.Size([8, 10, 10])
torch.Size([16, 10, 10])
torch.Size([8, 10, 10])
torch.Size([4, 10, 1])
torch.Size([1, 4, 1])


In [18]:
import torch.nn as nn

class IdentityMapping(nn.Module):
    def __init__(self):
        super(IdentityMapping, self).__init__()
    
    def forward(self, x):
        return x

In [19]:
np.random.seed(24)
torch.manual_seed(24) ####################### CHANGE

batch_size = 32 # 64

ds_train = torch.utils.data.TensorDataset(torch.from_numpy(x_train_real_normalized).float(), torch.from_numpy(y_train_real).float())
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True) # suffle 

ds_val = torch.utils.data.TensorDataset(torch.from_numpy(x_val_normalized).float(), torch.from_numpy(y_val).float())
dl_val = torch.utils.data.DataLoader(ds_val, batch_size=512, shuffle=False)

ds_test = torch.utils.data.TensorDataset(torch.from_numpy(x_test_normalized).float(), torch.from_numpy(y_test).float())
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=512, shuffle=False)

# steps = 5e3
epochs = 50
print(f"Training with {len(x_train_real)} datapoints for {epochs} epochs")

# Change this boolean to False for SNGP
DUE = True

input_dim = 8 # input di  # 128
num_outputs = 1 # regression with 1D output

feature_extractor = IdentityMapping()


n_inducing_points = 50 # 100
 
initial_inducing_points, initial_lengthscale = initial_values(
    ds_train, feature_extractor, n_inducing_points)

gp_model = CircuitSMGP(
        num_outputs=num_outputs,
        num_features=input_dim,          # CHANGE features / input_dim
        initial_inducing_points=initial_inducing_points,
        circuit=pc_sm,
    )

likelihood = GaussianLikelihood()
elbo_fn = VariationalELBO(likelihood, gp_model, num_data=len(ds_train))
loss_fn = lambda x, y: -elbo_fn(x, y)
    

if torch.cuda.is_available():
    gp_model = gp_model.cuda()
    likelihood = likelihood.cuda()

# learning rate   
lr = 1e-3

parameters = [
    {"params": gp_model.parameters(), "lr": lr},
]

parameters.append({"params": likelihood.parameters(), "lr": lr})
    
optimizer = torch.optim.Adam(parameters)
pbar = ProgressBar()

def step(engine, batch):
    gp_model.train()
    likelihood.train()
    
    optimizer.zero_grad()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    y_pred = gp_model(x) # get y


    loss = loss_fn(y_pred, y) # loss
    
    loss.backward()
    optimizer.step()
    
    return loss.item()


def eval_step(engine, batch):
    gp_model.eval() # set to eval
    likelihood.eval()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    y_pred = gp_model(x)    
    return y_pred, y

    
trainer = Engine(step)
evaluator = Engine(eval_step)

metric = Average()
metric.attach(trainer, "loss")
pbar.attach(trainer)

metric = Loss(lambda y_pred, y: - likelihood.expected_log_prob(y, y_pred).mean())
metric.attach(evaluator, "loss")

@trainer.on(Events.EPOCH_COMPLETED(every=int(epochs/20) + 1))
def log_results(trainer):
    evaluator.run(dl_val) # val dataset
    print(f"Results - Epoch: {trainer.state.epoch} - "
          f"Val Loss: {evaluator.state.metrics['loss']:.2f} - "
          f"Train Loss: {trainer.state.metrics['loss']:.2f}")


Training with 32000 datapoints for 50 epochs
f_X_samples shape torch.Size([1000, 8])
initial_lengthscale tensor(3.8970)
All circuit parameters shape: 
torch.Size([10, 1, 8])
torch.Size([10, 1, 8])
torch.Size([8, 10, 10])
torch.Size([16, 10, 10])
torch.Size([8, 10, 10])
torch.Size([4, 10, 1])
torch.Size([1, 4, 1])


  super()._check_params_vs_input(X, default_n_init=3)
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

  from tqdm.autonotebook import tqdm


In [20]:
for index, param in enumerate(gp_model.parameters()): 
    # if (index==2):
    print(param.shape)

torch.Size([50, 8])
torch.Size([50])
torch.Size([50, 50])
torch.Size([10, 1, 8])
torch.Size([10, 1, 8])
torch.Size([8, 10, 10])
torch.Size([16, 10, 10])
torch.Size([8, 10, 10])
torch.Size([4, 10, 1])
torch.Size([1, 4, 1])
torch.Size([1])
torch.Size([])


In [21]:
trainer.run(dl_train, max_epochs=epochs)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2198.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

Results - Epoch: 3 - Val Loss: 131115.28 - Train Loss: 197295.81


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

Results - Epoch: 6 - Val Loss: 21793.44 - Train Loss: 29002.67


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

Results - Epoch: 9 - Val Loss: 4594.61 - Train Loss: 5991.33


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

Results - Epoch: 12 - Val Loss: 8.87 - Train Loss: 13.58


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]

Results - Epoch: 15 - Val Loss: 2.28 - Train Loss: 2.62


[1/1000]   0%|           [00:00<?]

[1/1000]   0%|           [00:00<?]



[1/1000]   0%|           [00:00<?]

Engine run is terminating due to exception: 


KeyboardInterrupt: 

In [None]:
for param in likelihood.parameters(): 
    print (param.shape)

In [124]:
from ignite.metrics import RootMeanSquaredError
import torch

# Assuming you have a function to compute RMSE, or you're using Ignite's RMSE metric

def eval_step(engine, batch):
    gp_model.eval()  # Ensure model is in evaluation mode
    likelihood.eval()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    # Assuming your model outputs a distribution, e.g., MultivariateNormal
    with torch.no_grad():  # Disable gradient computation for evaluation
        distribution = gp_model(x)
        y_pred = distribution.mean  # Use the mean of the distribution as the prediction

    return y_pred, y

# Update the evaluator engine
evaluator = Engine(eval_step)

# Attach the RMSE metric to the evaluator
rmse = RootMeanSquaredError()
rmse.attach(evaluator, "RMSE")

# After training, run the evaluator on the test dataset to compute the RMSE
evaluator.run(dl_test)

# Retrieve and display the RMSE
test_rmse = evaluator.state.metrics['RMSE']
print(f"Test RMSE: {test_rmse:.2f}")


Test RMSE: 0.28


# Test SM circuit kernel

In [99]:
x_1 = torch.rand((3, 8 ,1))
x_2 = torch.rand((3, 8 ,1))

In [84]:
pc_sm.input_layer_params.params_mu().shape, pc_sm.input_layer_params.params_sigma().shape, pc_sm.inner_layers[0].params_in().shape




(torch.Size([4, 1, 8]), torch.Size([4, 1, 8]), torch.Size([1, 4, 1]))

In [100]:
pc_sm(x_1, x_2)


tensor([[[4.2293e-04],
         [2.1714e-02],
         [3.4552e-07]],

        [[2.1888e-09],
         [6.2656e-13],
         [1.0155e-09]],

        [[6.2082e-07],
         [3.9243e-08],
         [3.3809e-10]]], grad_fn=<SelectBackward0>)

In [101]:
sigma = pc_sm.input_layer_params.params_sigma().squeeze(1)
mu = pc_sm.input_layer_params.params_mu().squeeze(1)
weights = pc_sm.inner_layers[0].params_in().squeeze(0, 2)

def SMK(x1, x2, sigma, mu): 
    tau = x1-x2
    return torch.prod(torch.exp(-2*(torch.pi**2)* (tau**2)*(sigma**2) )) * torch.cos(2.0*torch.pi*torch.dot(tau,mu))

for i in range(x_1.shape[0]): 
    for j in range(x_2.shape[0]): 
        smk = []
        for k in range(sigma.shape[0]): 
            smk.append(weights[k] * SMK(x_1.squeeze(-1)[i], x_2.squeeze(-1)[j], sigma[k], mu[k]) )
        print(sum(smk))

tensor(0.0004, grad_fn=<AddBackward0>)
tensor(0.0217, grad_fn=<AddBackward0>)
tensor(3.4552e-07, grad_fn=<AddBackward0>)
tensor(2.1888e-09, grad_fn=<AddBackward0>)
tensor(6.2656e-13, grad_fn=<AddBackward0>)
tensor(1.0155e-09, grad_fn=<AddBackward0>)
tensor(6.2082e-07, grad_fn=<AddBackward0>)
tensor(3.9243e-08, grad_fn=<AddBackward0>)
tensor(3.3809e-10, grad_fn=<AddBackward0>)


In [None]:
# SM kernel Check

In [310]:


from cirkit.layers.input.sm_layer_imag import SMKernelPosImagLayer, SMKernelNegImagLayer, SMKernelImagLayerParams
# (3,5,1)
x_1 = torch.rand((3, 7, 1))
x_2 = torch.rand((3, 7, 1))

params_module = SMKernelImagLayerParams(num_vars=7, num_output_units=12)
pos_layer = SMKernelPosImagLayer(num_vars=7, num_output_units=12, params_module=params_module)
neg_layer = SMKernelNegImagLayer(num_vars=7, num_output_units=12, params_module=params_module)

torch.mean(((torch.prod(torch.exp(pos_layer(x_1, x_2)), dim=2) + torch.prod(torch.exp(neg_layer(x_1, x_2)), dim=2) )*0.5), dim=2).squeeze().real

# 1/2/3/4/5
# 1/2/4/8/16


tensor([[-1.1328e-08, -7.2096e-08, -1.0445e-33],
        [ 1.4426e-23,  4.1552e-24, -4.1187e-12],
        [-2.6234e-14, -1.5526e-17,  1.7630e-19]], grad_fn=<SelectBackward0>)

In [98]:
print("Do leaf1 and leaf2 share the same 'shared_linear' parameters? ", 
      id(pos_layer.params.params_sigma()) == id(neg_layer.params.params_sigma() ))

Do leaf1 and leaf2 share the same 'shared_linear' parameters?  True


In [119]:
for param in pos_layer.parameters(): 
    print (param.shape)

torch.Size([1, 1, 1])
torch.Size([1, 1, 1])


In [260]:
pos_layer.params.params_sigma().shape

torch.Size([1, 1, 7])

In [311]:
sigma = pos_layer.params.params_sigma().squeeze(1)
mu = pos_layer.params.params_mu().squeeze(1)

def SMK(x1, x2, sigma, mu): 
    tau = x1-x2
    return torch.prod(torch.exp(-2*(torch.pi**2)* (tau**2)*(sigma**2) )) * torch.cos(2*torch.pi*torch.dot(tau,mu))

for i in range(x_1.shape[0]): 
    for j in range(x_2.shape[0]): 
        smk = []
        for k in range(sigma.shape[0]): 
            smk.append(SMK(x_1[i].squeeze(-1), x_2[j].squeeze(-1), sigma[k], mu[k]) )
        print(sum(smk) / len(smk))

tensor(-1.1328e-08, grad_fn=<DivBackward0>)
tensor(-7.2096e-08, grad_fn=<DivBackward0>)
tensor(-1.0445e-33, grad_fn=<DivBackward0>)
tensor(1.4425e-23, grad_fn=<DivBackward0>)
tensor(4.1552e-24, grad_fn=<DivBackward0>)
tensor(-4.1187e-12, grad_fn=<DivBackward0>)
tensor(-2.6234e-14, grad_fn=<DivBackward0>)
tensor(-1.5526e-17, grad_fn=<DivBackward0>)
tensor(1.7630e-19, grad_fn=<DivBackward0>)


In [323]:
from cirkit.region_graph.quad_tree import QuadTree
# region_graph = QuadTree(width, height, struct_decomp=False)
# region_graph = RandomBinaryTree(num_vars=128, depth=6, num_repetitions=1)
region_graph = FullyFactorized(num_vars=7)


In [324]:
from cirkit.layers.input.exp_family import CategoricalLayer
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.input.rbf_kernel import RBFKernelLayer

efamily_cls = params_module
efamily_kwargs = {}
layer_cls = CPLayer
layer_kwargs = {'rank': 1}


In [325]:
from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax
from cirkit.models.tensorized_circuit import TensorizedPC
from cirkit.models.tensorized_SM_circuit import TensorizedSMPC
pc_sm = TensorizedSMPC.from_region_graph(
    region_graph,
    num_inner_units=12,
    num_input_units=12,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=1,
    reparam=ReparamSoftmax # ReparamLogSoftmax # ReparamExp
)
pc_sm.to(device)
print(pc_sm)


TensorizedSMPC(
  (input_layer_params): SMKernelImagLayerParams(
    (params_sigma): ReparamExp()
    (params_mu): ReparamIdentity()
  )
  (input_layer_pos): SMKernelPosImagLayer(
    (params): SMKernelImagLayerParams(
      (params_sigma): ReparamExp()
      (params_mu): ReparamIdentity()
    )
  )
  (input_layer_neg): SMKernelNegImagLayer(
    (params): SMKernelImagLayerParams(
      (params_sigma): ReparamExp()
      (params_mu): ReparamIdentity()
    )
  )
  (scope_layer): ScopeLayer()
  (inner_layers): ModuleList(
    (0): CollapsedCPLayer(
      (params_in): ReparamSoftmax()
    )
  )
)


In [322]:
# pc_sm.inner_layers[0].params_in.param = torch.nn.Parameter(torch.log(torch.ones((1, 12, 1))))

# pc_sm.inner_layers[0].params_in()

RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 4

In [326]:
for param in pc_sm.parameters(): 
    print (param.shape)
    # print (param)

torch.Size([12, 1, 7])
torch.Size([12, 1, 7])
torch.Size([1, 7, 12, 1])


In [329]:
pc_sm(x_1, x_2).shape

torch.Size([3, 3, 1])

In [134]:
pos_layer(x_1, x_2).squeeze(), neg_layer(x_1, x_2).squeeze()

(tensor([[-13.2346+2.7592j,  -0.0169-0.0985j, -11.9529+2.6222j],
         [ -0.6093+0.5920j,  -8.9233-2.2656j,  -0.3599+0.4550j],
         [ -0.6418+0.6076j,  -8.8009-2.2500j,  -0.3850+0.4706j]],
        grad_fn=<SqueezeBackward0>),
 tensor([[-13.2346-2.7592j,  -0.0169+0.0985j, -11.9529-2.6222j],
         [ -0.6093-0.5920j,  -8.9233+2.2656j,  -0.3599-0.4550j],
         [ -0.6418-0.6076j,  -8.8009+2.2500j,  -0.3850-0.4706j]],
        grad_fn=<SqueezeBackward0>))

In [None]:
# Define the Spectral Mixture (SM) kernel function correctly
def sm_kernel(w, tau, mu, variance):
    """
    Compute the Spectral Mixture (SM) kernel given weights w, time points tau,
    mixture means mu, and variance parameters.

    :param w: Weight for each mixture component (Q-dimensional tensor).
    :param tau: The time difference tensor (1-dimensional tensor representing time differences).
    :param mu: The mean for each mixture component (Q-dimensional tensor).
    :param variance: The variance for each mixture component (Q-dimensional tensor).
    :return: The computed SM kernel as a 1D tensor.
    """
    Q = w.size(0)  # Number of mixture components
    tau_expanded = tau.unsqueeze(-1)  # Expand dims to allow for broadcasting with Q components

    # Compute the cosine part of the kernel
    cosine_part = torch.cos(2 * torch.pi * tau_expanded * mu)

    # Compute the exponential part of the kernel
    exp_part = torch.exp(-2 * torch.pi**2 * tau_expanded.pow(2) * variance)

    # Combine the parts and sum over the mixture components to get the kernel value
    kernel_values = torch.sum(w * cosine_part * exp_part, dim=1)

    return kernel_values

# Example tensors for the parameters
# Assuming Q mixture components
Q = 3  # Example values for the number of mixtures
w_example = torch.rand(Q)  # Example weights tensor
tau_example = torch.rand(10)  # Example time difference tensor with 10 samples
mu_example = torch.rand(Q)  # Example means tensor
variance_example = torch.rand(Q)  # Example variance tensor

# Call the SM kernel function
kernel_output = sm_kernel(w_example, tau_example, mu_example, variance_example)
kernel_output


# Random example inputs for demonstration purposes
# tau_example = (x_1[0].squeeze() - x_2[0].squeeze()).unsqueeze(0)     # Example tau tensor
# mu_example = pos_layer.params.params_mu().squeeze(-2)       # Example mu tensor
# sigma_example = pos_layer.params.params_sigma().squeeze(-2)    # Example sigma tensor
# weight_example = torch.tensor([1])      # Example weight tensor

# # Calculate the SM kernel
# sm_kernel_example = sm_kernel(tau_example, mu_example, sigma_example, weight_example)
# sm_kernel_example


In [None]:
(x_1[0].squeeze() - x_2[1].squeeze()).shape

In [None]:
pc.scope_layer.scope.shape

In [None]:
pc.inner_layers[0].params_in() #.param #.shape #.param.shape
# (F, H, I, O)
# (fold count, arity, input, output)

In [None]:
from cirkit.models.rbf_kernel import RBFCircuitKernel

circuit_kernel = RBFCircuitKernel(pc, batch_shape=torch.Size([]))


In [None]:
circuit_kernel(x1.squeeze(), x2.squeeze()).evaluate()

In [202]:
pos_output.shape

torch.Size([3, 3, 7, 4, 1])

In [201]:
scope_tensor = torch.tensor([[[1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

        [[0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

        [[0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

        [[0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]],

        [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j]],

        [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j]],

        [[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j]]])

pos_output = pos_layer(x_1, x_2)

In [194]:
torch.einsum("...dkp,dpf->fk...", pos_output, scope_tensor)

tensor([[[[-5.3094e-01-0.4083j, -2.2431e-01-0.2654j, -2.3957e+00-0.8674j],
          [-2.6958e+00-0.9201j, -1.9234e+00-0.7772j, -6.0567e+00-1.3792j],
          [-1.0286e+01+1.7973j, -1.1987e+01+1.9403j, -5.7027e+00+1.3383j]],

         [[-2.4464e+00-0.7322j, -1.0335e+00-0.4759j, -1.1039e+01-1.5553j],
          [-1.2422e+01-1.6499j, -8.8623e+00-1.3936j, -2.7907e+01-2.4730j],
          [-4.7395e+01-3.0605j, -5.5232e+01-2.8042j, -2.6277e+01+2.3996j]],

         [[-1.5411e+00-0.0452j, -6.5107e-01-0.0294j, -6.9536e+00-0.0961j],
          [-7.8248e+00-0.1019j, -5.5827e+00-0.0861j, -1.7580e+01-0.1528j],
          [        nan+nanj, -3.4793e+01+0.2149j, -1.6553e+01+0.1483j]],

         [[-1.6925e+00-0.4124j, -7.1505e-01-0.2681j, -7.6369e+00-0.8761j],
          [-8.5938e+00-0.9294j, -6.1313e+00-0.7850j, -1.9308e+01-1.3930j],
          [-3.2790e+01+1.8154j, -3.8212e+01+1.9597j, -1.8179e+01+1.3517j]]],


        [[[-4.3661e-01-0.3991j, -1.5363e+01-2.3676j, -1.9595e+01-2.6739j],
          [-7.2240

In [195]:
pos_layer(x_1, x_2)

tensor([[[[[-5.3094e-01-0.4083j],
           [-2.4464e+00-0.7322j],
           [-1.5411e+00-0.0452j],
           [-1.6925e+00-0.4124j]],

          [[-4.3661e-01-0.3991j],
           [-1.2490e+00-0.4441j],
           [-1.5873e+00-0.4144j],
           [-4.4345e-01-0.2912j]],

          [[-2.9707e+00+0.3724j],
           [-2.0726e+00+0.1821j],
           [-1.2360e+01+1.8397j],
           [-2.7982e+00+0.7557j]],

          [[-3.8262e+00+0.8995j],
           [-8.3641e+00+1.6969j],
           [-4.3808e+00+1.8448j],
           [-9.2761e+00+2.1337j]],

          [[-4.6458e-01+0.4903j],
           [-8.9577e-01+0.3567j],
           [-5.8208e-01+0.0263j],
           [-4.7678e-01+0.6970j]],

          [[-1.0195e+01-1.6488j],
           [-1.5126e+01-0.6515j],
           [-3.0535e+00-0.1947j],
           [-1.1306e+01-0.4748j]],

          [[-2.2601e-01+0.3530j],
           [-4.8562e-01+0.3924j],
           [-5.3465e-01+0.1728j],
           [-6.0345e-01+0.1491j]]],


         [[[-2.2431e-01-0.2654j]

In [212]:
scope_tensor_s = scope_tensor.squeeze()
pos_output_s = pos_output.squeeze()[0][0]

pos_output_1 = pos_output.squeeze()[2][0].unsqueeze(0)

torch.einsum("...dk,df->fk...", pos_output_1, scope_tensor_s)

tensor([[[-10.2860+1.7973j],
         [-47.3948-3.0605j],
         [     nan+nanj],
         [-32.7898+1.8154j]],

        [[ -0.7689-0.5297j],
         [ -2.1997-0.5893j],
         [     nan+nanj],
         [ -0.7810-0.3864j]],

        [[-26.4520+1.1112j],
         [-18.4546+0.5433j],
         [    -inf+nanj],
         [-24.9157+2.2551j]],

        [[ -0.8173+0.4157j],
         [ -1.7866+0.7842j],
         [     nan+nanj],
         [ -1.9814+0.9861j]],

        [[ -5.3585+1.6651j],
         [-10.3318+1.2113j],
         [     nan+nanj],
         [ -5.4991+2.3672j]],

        [[ -5.7847-1.2420j],
         [ -8.5820-0.4908j],
         [     nan+nanj],
         [ -6.4146-0.3576j]],

        [[ -2.5165-1.1779j],
         [ -5.4073-1.3095j],
         [     nan+nanj],
         [ -6.7192-0.4974j]]], grad_fn=<ViewBackward0>)

In [213]:
pos_output_1

tensor([[[-10.2860+1.7973j, -47.3948-3.0605j, -29.8559+0.1991j,
          -32.7898+1.8154j],
         [ -0.7689-0.5297j,  -2.1997-0.5893j,  -2.7955-0.5500j,
           -0.7810-0.3864j],
         [-26.4520+1.1112j, -18.4546+0.5433j,     -inf-0.0000j,
          -24.9157+2.2551j],
         [ -0.8173+0.4157j,  -1.7866+0.7842j,  -0.9357+0.8526j,
           -1.9814+0.9861j],
         [ -5.3585+1.6651j, -10.3318+1.2113j,  -6.7137+0.0893j,
           -5.4991+2.3672j],
         [ -5.7847-1.2420j,  -8.5820-0.4908j,  -1.7325-0.1467j,
           -6.4146-0.3576j],
         [ -2.5165-1.1779j,  -5.4073-1.3095j,  -5.9532-0.5766j,
           -6.7192-0.4974j]]], grad_fn=<UnsqueezeBackward0>)

In [214]:
scope_tensor_s

tensor([[1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
        [0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
        [0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
        [0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
        [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j, 0.+0.j],
        [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j, 0.+0.j],
        [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j]])

In [None]:
# set parameters

pc.input_layer.params.param = torch.nn.Parameter(torch.log(torch.ones(tuple(pc.input_layer.params.shape))*3.3))
# pc.inner_layers[0].params_in.param = torch.nn.Parameter(torch.log(0.25*torch.ones(tuple(pc.inner_layers[0].params_in.shape))))
# pc.inner_layers[0].params_in = torch.nn.Parameter(torch.ones(tuple(pc.inner_layers[0].params_in.shape))*3.3)
# pc.inner_layers[1].params_in = torch.nn.Parameter(torch.ones(tuple(pc.inner_layers[1].params_in.shape))*3.3)
# pc.inner_layers[2].params_in = torch.nn.Parameter(torch.ones(tuple(pc.inner_layers[2].params_in.shape))*3.3)
# pc.inner_layers[3].params_in = torch.nn.Parameter(torch.ones(tuple(pc.inner_layers[3].params_in.shape))*3.3)

In [None]:
pc.inner_layers[0].params_in() #.shape

In [None]:
x1 = torch.randn(3, 8, 1)
x2 = torch.randn(3, 8, 1)

In [None]:
pc(x1, x2).squeeze()

In [None]:
def eval_pc(x1, x2): 
    return pc(x1.unsqueeze(-1), x2.unsqueeze(-1)).squeeze(-1)

eval_pc(x1.squeeze(), x2.squeeze())

In [None]:
from gpytorch.kernels import RBFKernel

# x = torch.randn(3, 5)
covar_module = RBFKernel()
covar_module.lengthscale = torch.tensor(3.3)
covar_module(x1.squeeze(), x2.squeeze()).evaluate()

In [None]:
x1.squeeze().shape

In [None]:
from gpytorch.kernels import RBFKernel
x = torch.randn(3, 2)
RBFKernel().lengthscale = torch.tensor(3.3)

In [None]:
# Test RBF input output = RBF kernel 

In [None]:
from gpytorch.kernels import RBFKernel, SpectralMixtureKernel

x = torch.randn(3, 5)
covar_module = SpectralMixtureKernel(num_mixtures=2, ard_num_dims=5)
covar_module.mixture_scales = torch.tensor(3.3).expand(1, 2, 1, 5)
covar_module.mixture_means = torch.tensor(2.2).expand(1, 2, 1, 5)
covar_module.mixture_weights = torch.tensor([0.5]).expand(1, 2, 1, 5)
covar_module(x).evaluate()
# covar_module.lengthscale

In [None]:
from cirkit.layers.input.sm_kernel import SMKernelLayer
input_la = SMKernelLayer(num_vars=5, num_output_units=1)

input_la.params = torch.nn.Parameter(torch.ones((5,1))*3.3)

# input_la(x1, x2).squeeze().shape

# input_la(x.unsqueeze(-1), x.unsqueeze(-1)).shape

torch.prod(torch.exp(input_la(x.unsqueeze(-1), x.unsqueeze(-1)).squeeze()), dim=2)

In [None]:
input_la = RBFKernelLayer(num_vars=20, num_output_units=1)

input_la.params = torch.nn.Parameter(torch.ones((20,1))*3.3)

# input_la(x1, x2).squeeze().shape
torch.prod(input_la(x1, x1).squeeze(), dim=2)

In [None]:
from gpytorch.kernels import RBFKernel

x = torch.randn(3, 5)
covar_module = RBFKernel()
covar_module.lengthscale = torch.tensor(3.3)
covar_module(x).evaluate()
# covar_module.lengthscale

In [None]:
from cirkit.layers.input.rbf_kernel import RBFKernelLayer
input_la = RBFKernelLayer(num_vars=5, num_output_units=1)

input_la.params.param = torch.nn.Parameter(torch.log(torch.ones(tuple(input_la.params.shape))*3.3))
# pc.input_layer.params.param = torch.nn.Parameter(torch.log(torch.ones(tuple(pc.input_layer.params.shape))*3.3))

# input_la(x1, x2).squeeze().shape

# input_la(x.unsqueeze(-1), x.unsqueeze(-1)).shape

torch.prod(torch.exp(input_la(x.unsqueeze(-1), x.unsqueeze(-1)).squeeze()), dim=2)

In [None]:
train_x = torch.linspace(0, 1, 3)
torch.sin(train_x * (2 * math.pi))

In [None]:
import math


# train_x = torch.linspace(0, 1, 3)
# train_y = torch.sin(train_x * (2 * math.pi))
train_x = torch.rand((3, 5))
train_y = torch.rand((3))

covar_module = SpectralMixtureKernel(num_mixtures=4, ard_num_dims=5)
covar_module.initialize_from_data(train_x, train_y)
covar_module(train_x).evaluate()

In [None]:
from cirkit.layers.input.sm_kernel import SMKernelLayer
input_la = SMKernelLayer(num_vars=1, num_output_units=4)

input_la.params_mu.param = torch.nn.Parameter(covar_module.mixture_means)
input_la.params_sigma.param = torch.nn.Parameter(torch.log(covar_module.mixture_scales))


to_be_weighted = input_la(train_x.unsqueeze(-1), train_x.unsqueeze(-1))

to_be_weighted = torch.prod(to_be_weighted, dim=2, keepdim=True) / 5

tensor1_expanded = covar_module.mixture_weights.expand_as(to_be_weighted.squeeze(-1))

# Element-wise multiplication and then sum over the inner product dimension (dimension 3 after squeeze)
(tensor1_expanded * to_be_weighted.squeeze(-1)).sum(dim=3).squeeze()

# torch.prod(finalfinal, dim=-1, keepdim=False)

In [None]:
to_be_weighted.shape

In [None]:
covar_module.mixture_scales.shape

In [None]:
covar_module(x1).evaluate().shape

In [None]:
train_x.unsqueeze(-1).unsqueeze(-1).shape

In [None]:
x_2 = torch.tensor([[-0.6281], [ 0.1011], [ 0.0664]])

In [None]:
from cirkit.layers.input.rbf_kernel import RBFKernelLayer
input_la = RBFKernelLayer(num_vars=2, num_output_units=1)

input_la.params = torch.nn.Parameter(torch.ones((1,1))*3.3)

input_la(x_2.unsqueeze(-1), x_2.unsqueeze(-1)).squeeze()

In [None]:
input_la.params

In [None]:
torch.ones((2,1))*3.3

In [None]:
train_x.unsqueeze(-1).unsqueeze(-1).shape

In [None]:
input_la(x.unsqueeze(-1), x.unsqueeze(-1)).squeeze()

In [None]:
x_2.unsqueeze(-1).shape

In [None]:
torch.cdist(x1, x2, p=2)

In [None]:
from torch import optim
from torch.utils.data import DataLoader
train_dataloader = DataLoader(data_train, shuffle=True, batch_size=256)
test_dataloader = DataLoader(data_test, shuffle=False, batch_size=256)
optimizer = optim.SGD(pc.parameters(), lr=0.1, momentum=0.9)

Since the constructed PC is not necessarily normalized, we construct the integral circuit that will compute the partition function. Note that parameters are shared and therefore there is no additional memory required.

In [None]:
from cirkit.models.functional import integrate
pc_pf = integrate(pc)

Finally, we optimize the parameters for 5 epochs by minimizing the negative log-likelohood.

In [None]:
num_epochs = 5
for epoch_idx in range(num_epochs):
    running_loss = 0.0
    for batch, _ in train_dataloader:
        batch = batch.to(device).unsqueeze(dim=-1)  # Add a channel dimension
        log_score = pc(batch)
        log_pf = pc_pf(batch)     # Compute the partition function
        lls = log_score - log_pf  # Compute the log-likelihood
        loss = -torch.mean(lls)   # The loss is the negative average log-likelihood
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        running_loss += loss * len(batch)
        # Clamp the parameters to ensure they are in the intended domain
        # This is needed if we do not use any reparametrization to ensure parameters non-negativity
        # In our case, clamping is disable becuase we reparameterize via exponentiation (see above)
        #for layer in model.inner_layers:
        #    layer.clamp_params()
    print(f"Epoch {epoch_idx}: Average NLL: {running_loss / len(data_train):.3f}")

We then evaluate our model on test data by computing the average log-likelihood and bits per dimension.

In [None]:
with torch.no_grad():
    pc.eval()
    log_pf = pc_pf(torch.empty((), device=device))  # Compute the partition function once for testing
    test_lls = 0.0
    for batch, _ in test_dataloader:
        log_score = pc(batch.to(device).unsqueeze(dim=-1))
        lls = log_score - log_pf
        test_lls += lls.sum().item()
    average_ll = test_lls / len(data_test)
    bpd = -average_ll / (num_variables * np.log(2.0))
    print(f"Average test LL: {average_ll:.3f}")
    print(f"Bits per dimension: {bpd}")

In [None]:
#!/usr/bin/env python3

import gpytorch

# from ..functions import RBFCovariance
# from ..settings import trace_mode
from gpytorch.kernels import Kernel


def postprocess_rbf(dist_mat):
    return dist_mat.div_(-2).exp_()


class TestRBFKernel(Kernel):
    r"""
    Computes a covariance matrix based on the RBF (squared exponential) kernel
    between inputs :math:`\mathbf{x_1}` and :math:`\mathbf{x_2}`:

    .. math::

       \begin{equation*}
          k_{\text{RBF}}(\mathbf{x_1}, \mathbf{x_2}) = \exp \left( -\frac{1}{2}
          (\mathbf{x_1} - \mathbf{x_2})^\top \Theta^{-2} (\mathbf{x_1} - \mathbf{x_2}) \right)
       \end{equation*}

    where :math:`\Theta` is a :attr:`lengthscale` parameter.
    See :class:`gpytorch.kernels.Kernel` for descriptions of the lengthscale options.

    .. note::

        This kernel does not have an `outputscale` parameter. To add a scaling parameter,
        decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`.

    Args:
        :attr:`ard_num_dims` (int, optional):
            Set this if you want a separate lengthscale for each
            input dimension. It should be `d` if :attr:`x1` is a `n x d` matrix. Default: `None`
        :attr:`batch_shape` (torch.Size, optional):
            Set this if you want a separate lengthscale for each
            batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`.
        :attr:`active_dims` (tuple of ints, optional):
            Set this if you want to compute the covariance of only a few input dimensions. The ints
            corresponds to the indices of the dimensions. Default: `None`.
        :attr:`lengthscale_prior` (Prior, optional):
            Set this if you want to apply a prior to the lengthscale parameter.  Default: `None`.
        :attr:`lengthscale_constraint` (Constraint, optional):
            Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`.
        :attr:`eps` (float):
            The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`.

    Attributes:
        :attr:`lengthscale` (Tensor):
            The lengthscale parameter. Size/shape of parameter depends on the
            :attr:`ard_num_dims` and :attr:`batch_shape` arguments.

    Example:
        >>> x = torch.randn(10, 5)
        >>> # Non-batch: Simple option
        >>> covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
        >>> # Non-batch: ARD (different lengthscale for each input dimension)
        >>> covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=5))
        >>> covar = covar_module(x)  # Output: LazyTensor of size (10 x 10)
        >>>
        >>> batch_x = torch.randn(2, 10, 5)
        >>> # Batch: Simple option
        >>> covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
        >>> # Batch: different lengthscale for each batch
        >>> covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(batch_shape=torch.Size([2])))
        >>> covar = covar_module(x)  # Output: LazyTensor of size (2 x 10 x 10)
    """

    has_lengthscale = True

    def forward(self, x1, x2, diag=False, **params):

        x1_ = x1.div(self.lengthscale)
        x2_ = x2.div(self.lengthscale)
        
        # print ("x1, x2", x1_, x2_)
        
        return self.covar_dist(
            x1_, x2_, square_dist=True, diag=diag, dist_postprocess_func=postprocess_rbf, postprocess=True, **params
        )

In [None]:
test_kernel = TestRBFKernel()
test_kernel.lengthscale = torch.tensor(3.3)

In [None]:
test_kernel.lengthscale

In [None]:
test_kernel(x1.squeeze(),x2.squeeze()).evaluate()

In [None]:
x1.shape