# Train and evaluate a PC

In [16]:
import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from uci_datasets import Dataset

from ignite.engine import Events, Engine
from ignite.metrics import Average, Loss
from ignite.contrib.handlers import ProgressBar

import gpytorch
from gpytorch.mlls import VariationalELBO
from gpytorch.likelihoods import GaussianLikelihood

import pandas as pd

from cirkit.region_graph.random_binary_tree import RandomBinaryTree
from cirkit.region_graph.fully_factorized import FullyFactorized
from cirkit.models.gp import CircuitGP, initial_values
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.input.rbf_kernel_flatten import RBFKernelFlattenLayer
from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax
from cirkit.models.tensorized_circuit import TensorizedPC

In [17]:
class IdentityMapping(nn.Module):
    def __init__(self):
        super(IdentityMapping, self).__init__()
    
    def forward(self, x):
        return x

In [18]:
device = torch.device("cpu")  # The device to use, e.g., "cpu", "cuda", "cuda:1"

random.seed(4)
np.random.seed(4)
torch.manual_seed(4)

dataset = "kin40k"
dataset_split = random.randint(0, 9)

lr = 1e-3
batch_size = 32
epochs = 50
n_inducing_points = 50
kernel = "HBF" # not used

num_mixtures = 100
region_graph = FullyFactorized(num_vars=8)
# region_graph = RandomBinaryTree(num_vars=8, depth=3, num_repetitions=6)
efamily_cls = RBFKernelFlattenLayer   # Flatten
layer_cls = CPLayer
reparam = ReparamSoftmax

In [19]:
efamily_kwargs = {}

layer_kwargs = {'rank': 1}

pc = TensorizedPC.from_region_graph(
    region_graph,
    num_inner_units=num_mixtures,
    num_input_units=num_mixtures,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=1,
    reparam=reparam # ReparamLogSoftmax #  ReparamSoftmax
)
pc.to(device)
print(pc)

print("Circuit parameters: ")
for param in pc.parameters(): 
    print (param.shape)
    
total_params = sum(p.numel() for p in pc.parameters() if p.requires_grad)
print(f"Total number of parameters: {total_params}")

TensorizedPC(
  (input_layer): RBFKernelFlattenLayer(
    (params): ReparamExp()
  )
  (scope_layer): ScopeLayer()
  (inner_layers): ModuleList(
    (0): CollapsedCPLayer(
      (params_in): ReparamSoftmax()
    )
  )
)
Circuit parameters: 
torch.Size([8, 100])
torch.Size([1, 100, 1])
Total number of parameters: 900


In [20]:
data = Dataset(dataset)
x_train, y_train, x_test, y_test = data.get_split(split=dataset_split) 

val_split_point = x_train.shape[0] - x_test.shape[0]

x_train_real = x_train[:val_split_point] 
y_train_real = y_train[:val_split_point]
y_train_real = y_train_real.squeeze()
x_val = x_train[val_split_point:]
y_val = y_train[val_split_point:]
y_val = y_val.squeeze()
y_test = y_test.squeeze()

# Normalize dataset
mean = x_train_real.mean(axis=0)
std = x_train_real.std(axis=0)

x_train_real_normalized = (x_train_real - mean) / std
x_val_normalized = (x_val - mean) / std
x_test_normalized = (x_test - mean) / std

input_dim = x_train_real_normalized.shape[1]
num_outputs = 1
feature_extractor = IdentityMapping()

print("Training dataset size: ", x_train_real_normalized.shape[0])
print("Val dataset size: ", x_val_normalized.shape[0])
print("Test dataset size: ", x_test_normalized.shape[0])
print("Input dimension: ", input_dim)

kin40k dataset, N=40000, d=8
Training dataset size:  32000
Val dataset size:  4000
Test dataset size:  4000
Input dimension:  8


In [21]:
ds_train = torch.utils.data.TensorDataset(torch.from_numpy(x_train_real_normalized).float(), torch.from_numpy(y_train_real).float())
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True) # suffle 

ds_val = torch.utils.data.TensorDataset(torch.from_numpy(x_val_normalized).float(), torch.from_numpy(y_val).float())
dl_val = torch.utils.data.DataLoader(ds_val, batch_size=512, shuffle=False)

ds_test = torch.utils.data.TensorDataset(torch.from_numpy(x_test_normalized).float(), torch.from_numpy(y_test).float())
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=512, shuffle=False)

initial_inducing_points, initial_lengthscale = initial_values(
        ds_train, feature_extractor, n_inducing_points
)

gp_model = CircuitGP(
    num_outputs=num_outputs,
    num_features=input_dim,          # CHANGE features / input_dim
    initial_lengthscale=initial_lengthscale,
    initial_inducing_points=initial_inducing_points,
    circuit=pc
    # kernel=kernel,
)
    
likelihood = GaussianLikelihood()
elbo_fn = VariationalELBO(likelihood, gp_model, num_data=len(ds_train))
loss_fn = lambda x, y: -elbo_fn(x, y)

parameters = [
    {"params": gp_model.parameters(), "lr": lr},
]
parameters.append({"params": likelihood.parameters(), "lr": lr})
    
optimizer = torch.optim.Adam(parameters)
pbar = ProgressBar()

step_counter = 0

def step(engine, batch):
    
    global step_counter
    step_counter += 1
    
    gp_model.train()
    likelihood.train()
    
    optimizer.zero_grad()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    y_pred = gp_model(x) # get y    
    loss = loss_fn(y_pred, y) # loss

    
    if torch.isnan(loss).any():
        print(f"Step {step_counter}: NaN detected in loss.")
        print("loss", loss)
        print("y_pred", y_pred)
    
    if torch.isnan(loss).any():
        print("NaN detected in loss, saving model and stopping.")
        # Save model weights before termination
        torch.save(gp_model.state_dict(), 'model_weights_before_nan.pt')
        engine.terminate()
        return
    
    loss.backward()
    optimizer.step()
    
    return loss.item()


def eval_step(engine, batch):
    gp_model.eval() # set to eval
    likelihood.eval()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    y_pred = gp_model(x)   
    return y_pred, y

    
trainer = Engine(step)
evaluator = Engine(eval_step)

metric = Average()
metric.attach(trainer, "loss")
pbar.attach(trainer)

metric = Loss(lambda y_pred, y: - likelihood.expected_log_prob(y, y_pred).mean())

metric.attach(evaluator, "loss")

@trainer.on(Events.EPOCH_COMPLETED(every=int(epochs/20) + 1))
def log_results(trainer):
    evaluator.run(dl_val) # val dataset
    print(f"Results - Epoch: {trainer.state.epoch} - "
          f"Val Loss: {evaluator.state.metrics['loss']:.2f} - "
          f"Train Loss: {trainer.state.metrics['loss']:.2f}")


f_X_samples shape torch.Size([1000, 8])
initial_lengthscale tensor(3.9071)
All circuit parameters shape: 
torch.Size([8, 100])
torch.Size([1, 100, 1])


  super()._check_params_vs_input(X, default_n_init=3)
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

  from tqdm.autonotebook import tqdm


In [22]:
print("Total model params: ")
for index, param in enumerate(gp_model.parameters()): 
    # if (index==2):
    print(param.shape)

Total model params: 
torch.Size([50, 8])
torch.Size([50])
torch.Size([50, 50])
torch.Size([8, 100])
torch.Size([1, 100, 1])
torch.Size([1])
torch.Size([])


In [23]:
trainer.run(dl_train, max_epochs=epochs)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:2198.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


[1/1000]   0%|           [00:00<?]

Engine run is terminating due to exception: 


KeyboardInterrupt: 

In [24]:
from ignite.metrics import RootMeanSquaredError
import torch

# Assuming you have a function to compute RMSE, or you're using Ignite's RMSE metric

def eval_step(engine, batch):
    gp_model.eval()  # Ensure model is in evaluation mode
    likelihood.eval()
    
    x, y = batch
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()

    # Assuming your model outputs a distribution, e.g., MultivariateNormal
    with torch.no_grad():  # Disable gradient computation for evaluation
        distribution = gp_model(x)
        y_pred = distribution.mean  # Use the mean of the distribution as the prediction

    return y_pred, y

# Update the evaluator engine
evaluator = Engine(eval_step)

# Attach the RMSE metric to the evaluator
rmse = RootMeanSquaredError()
rmse.attach(evaluator, "RMSE")

# After training, run the evaluator on the test dataset to compute the RMSE
evaluator.run(dl_test)

# Retrieve and display the RMSE
test_rmse = evaluator.state.metrics['RMSE']
print(f"Test RMSE: {test_rmse:.2f}")


Test RMSE: 0.97
