# Train and evaluate a PC

In [1]:
import random
import torch
import numpy as np
import matplotlib.pyplot as plt

In [4]:
device = torch.device("cpu")  # The device to use, e.g., "cpu", "cuda", "cuda:1"

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
%reload_ext autoreload

Set the random seeds.

In [5]:
random.seed(4)
np.random.seed(4)
torch.manual_seed(4)
# if 'cuda' in device.type:
#     torch.cuda.manual_seed(42)

<torch._C.Generator at 0x7f88204a78b0>

## Load MNIST Dataset

Load the training and test splits of MNIST, and preprocess them by flattening the tensor images.

In [None]:
from torchvision import transforms, datasets
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: (255 * x.view(-1)).long())
])
data_train = datasets.MNIST('datasets', train=True, download=True, transform=transform)
data_test = datasets.MNIST('datasets', train=False, download=True, transform=transform)
num_variables = data_train[0][0].shape[0]
height, width = 28, 28
print(f"Number of variables: {num_variables}")

In [None]:
plt.matshow(data_train[0][0].reshape(28, 28), cmap='gray')
plt.title(f"Class: {data_train[0][1]}")
plt.show()

## Instantiating the region graph

Initialize a _Quad Graph_ region graph.

In [7]:
from cirkit.region_graph.quad_tree import QuadTree
# region_graph = QuadTree(width, height, struct_decomp=False)
# region_graph = RandomBinaryTree(num_vars=128, depth=6, num_repetitions=1)
region_graph = FullyFactorized(num_vars=8)

In [None]:
region_graph

In [None]:
region_graph._nodes

Others available region graphs are _Poon Domingos_ and _QuadTree_, whose imports are showed below.

In [6]:
from cirkit.region_graph.poon_domingos import PoonDomingos
from cirkit.region_graph.random_binary_tree import RandomBinaryTree
from cirkit.region_graph.fully_factorized import FullyFactorized

## Choosing the layers

Now we have to choose both the input and inner layers of our circuit. As input layer we select the _CategoricalLayer_ with 256 categories (the number of pixel values). For the inner layer instead, we choose the _uncollapsed CP_ layer with rank 1.

In [8]:
from cirkit.layers.input.exp_family import CategoricalLayer
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.sum_product.cp_w_bias import CPLayerWithBias
from cirkit.layers.input.rbf_network_kernel import RBFNetworkKernelLayer

efamily_cls = RBFNetworkKernelLayer
efamily_kwargs = {}
layer_cls = CPLayerWithBias
layer_kwargs = {'rank': 1}

## Building the tensorized PC

We can now build our tensorized PC by specifying the region graph and layers we chose previously. In addition, we can scale the architecture by increasing the number of input and inner units. We can also have circuits with multiple output units by choosing _num_classes > 1_. However, in this notebook we only estimate the distribution of the images and marginalize out the class variable.

To ensure weights are non-negative we reparametrize them via exponentiation. Several reparametrization functions are available.

In [9]:
from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax, ReparamIdentity
from cirkit.models.tensorized_circuit import TensorizedPC
pc_rbfn = TensorizedPC.from_region_graph(
    region_graph,
    num_inner_units=10,
    num_input_units=10,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=1,
    reparam=ReparamLogSoftmax # ReparamExp ReparamIdentity # 
)
pc_rbfn.to(device)
print(pc_rbfn)

TensorizedPC(
  (input_layer): RBFNetworkKernelLayer(
    (params_sigma): ReparamExp()
    (params_mu): ReparamIdentity()
    (params_weight): ReparamIdentity()
  )
  (scope_layer): ScopeLayer()
  (inner_layers): ModuleList(
    (0): CollapsedCPLayer(
      (params_in): ReparamLogSoftmax()
      (params_bias): ReparamLogSoftmax()
    )
  )
)


In [10]:
for param in pc_rbfn.parameters(): 
    print (param.shape)

torch.Size([8, 10])
torch.Size([8, 10])
torch.Size([8, 10, 10])
torch.Size([1, 10, 1])
torch.Size([1, 1])


In [None]:
from cirkit.models.gp import CircuitGP, initial_values

In [11]:
import torch.nn.functional as F

from uci_datasets import Dataset

from ignite.engine import Events, Engine
from ignite.metrics import Average, Loss
from ignite.contrib.handlers import ProgressBar

import gpytorch
from gpytorch.mlls import VariationalELBO
from gpytorch.likelihoods import GaussianLikelihood

import pandas as pd
import numpy as np


In [20]:
data = Dataset("protein")
x_train, y_train, x_test, y_test = data.get_split(split=2)

protein dataset, N=45730, d=9


In [21]:
x_train.shape, x_test.shape

((41157, 9), (4573, 9))

In [22]:
x_train_real = x_train[:36584] #32000 # 2053   36584    36584     39063   13281    2672   # RE-RUN # 13279   # 1279   4701  824
y_train_real = y_train[:36584]
y_train_real = y_train_real.squeeze()
x_val = x_train[36584:]
y_val = y_train[36584:]
y_val = y_val.squeeze()
y_test = y_test.squeeze()

In [23]:
mean = x_train_real.mean(axis=0)
std = x_train_real.std(axis=0)

x_train_real_normalized = (x_train_real - mean) / std
x_val_normalized = (x_val - mean) / std
x_test_normalized = (x_test - mean) / std

# x_train_real_normalized = x_train_real
# x_val_normalized = x_val
# x_test_normalized = x_test


In [None]:
x_train_real.dtype

In [24]:
np.random.seed(24)
torch.manual_seed(24) ####################### CHANGE

batch_size = 32 # 64

ds_train = torch.utils.data.TensorDataset(torch.from_numpy(x_train_real_normalized).float(), torch.from_numpy(y_train_real).float())
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True) # suffle 

ds_val = torch.utils.data.TensorDataset(torch.from_numpy(x_val_normalized).float(), torch.from_numpy(y_val).float())
dl_val = torch.utils.data.DataLoader(ds_val, batch_size=32, shuffle=False)

ds_test = torch.utils.data.TensorDataset(torch.from_numpy(x_test_normalized).float(), torch.from_numpy(y_test).float())
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=32, shuffle=False)

In [25]:
from cirkit.region_graph.poon_domingos import PoonDomingos
from cirkit.region_graph.random_binary_tree import RandomBinaryTree
from cirkit.region_graph.fully_factorized import FullyFactorized
from cirkit.region_graph.quad_tree import QuadTree
# region_graph = QuadTree(width, height, struct_decomp=False)
region_graph = RandomBinaryTree(num_vars=9, depth=3, num_repetitions=10)
# region_graph = FullyFactorized(num_vars=8)


from cirkit.layers.input.exp_family import CategoricalLayer
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.sum_product.cp_w_bias import CPLayerWithBias
from cirkit.layers.input.rbf_network_kernel import RBFNetworkKernelLayer

efamily_cls = RBFNetworkKernelLayer
efamily_kwargs = {}
layer_cls = CPLayerWithBias
layer_kwargs = {'rank': 1}

num_units = 16 # 356


from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax, ReparamIdentity
from cirkit.models.tensorized_circuit import TensorizedPC
pc_rbfn = TensorizedPC.from_region_graph(
    region_graph,
    num_inner_units=num_units,
    num_input_units=num_units,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=1,
    reparam=ReparamIdentity # ReparamExp  # ReparamLogSoftmax
)
pc_rbfn.to(device)
print(pc_rbfn)


TensorizedPC(
  (input_layer): RBFNetworkKernelLayer(
    (params_sigma): ReparamExp()
    (params_mu): ReparamIdentity()
    (params_weight): ReparamIdentity()
  )
  (scope_layer): ScopeLayer()
  (inner_layers): ModuleList(
    (0-2): 3 x CollapsedCPLayer(
      (params_in): ReparamIdentity()
      (params_bias): ReparamIdentity()
    )
    (3): SumLayer(
      (params): ReparamIdentity()
    )
  )
)


In [26]:
for param in pc_rbfn.parameters(): 
    print (param.shape)

torch.Size([9, 16])
torch.Size([9, 16])
torch.Size([9, 16, 16])
torch.Size([40, 16, 16])
torch.Size([40, 16])
torch.Size([20, 16, 16])
torch.Size([20, 16])
torch.Size([10, 16, 1])
torch.Size([10, 1])
torch.Size([1, 10, 1])


In [27]:
total_params = sum(p.numel() for p in pc_rbfn.parameters() if p.requires_grad)
total_params

19092

In [None]:
# import torch.nn as nn

# class IdentityMapping(nn.Module):
#     def __init__(self):
#         super(IdentityMapping, self).__init__()
    
#     def forward(self, x):
#         return x
    
# feature_extractor = IdentityMapping()

# initial_centers, initial_lengthscale = initial_values(
#     ds_train, feature_extractor, n_inducing_points=num_units
# )
# initial_centers.transpose(0, 1), initial_lengthscale

# pc_rbfn.input_layer.params_sigma.param = torch.nn.Parameter(
#             torch.log(initial_lengthscale * torch.ones_like(pc_rbfn.input_layer.params_sigma.param) ))

# pc_rbfn.input_layer.params_mu.param = torch.nn.Parameter(initial_centers.transpose(0, 1))

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(24)
np.random.seed(24)

# Parameters
batch_size = 32
epochs = 100
learning_rate = 1e-3

# Assume pc_rbfn is your model, already defined and initialized elsewhere

# Loss function
criterion = nn.MSELoss()

# Optimizer
optimizer = optim.Adam(pc_rbfn.parameters(), lr=learning_rate)


nan_counter = 0
exit_loops = False

# Training and Validation Loop
for epoch in range(epochs):
    # Training phase
    pc_rbfn.train()
    train_loss = 0.0
    
    for inputs, targets in dl_train:
        optimizer.zero_grad()
        outputs = pc_rbfn(inputs)
        outputs = outputs.squeeze(1)  # Ensure outputs match the target's shape
        
        # if(torch.isnan(outputs).any() == False):
        #     print("no NAN")

        if(torch.isnan(outputs).any()):
            print("outputs", outputs)
        
        if(torch.isnan(targets).any()):
            print("targets", targets)
        
        loss = criterion(outputs, targets)
        if(torch.isnan(loss).any()):
            print("loss", loss)
            nan_counter += 1
        if (nan_counter > 3):
            exit_loops = True
            break
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)
        
    if exit_loops:
        break
    train_loss /= len(dl_train.dataset)

    # Validation phase
    pc_rbfn.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in dl_val:
            outputs = pc_rbfn(inputs)
            outputs = outputs.squeeze(1)  # Ensure outputs match the target's shape
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
    val_loss /= len(dl_val.dataset)

    print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")



Epoch 1: Train Loss: 0.4381, Val Loss: 0.3907
Epoch 2: Train Loss: 0.3707, Val Loss: 0.3565
Epoch 3: Train Loss: 0.3457, Val Loss: 0.3386
Epoch 4: Train Loss: 0.3289, Val Loss: 0.3233
Epoch 5: Train Loss: 0.3162, Val Loss: 0.3126
Epoch 6: Train Loss: 0.3048, Val Loss: 0.2989
Epoch 7: Train Loss: 0.2958, Val Loss: 0.3009
Epoch 8: Train Loss: 0.2881, Val Loss: 0.2886
Epoch 9: Train Loss: 0.2814, Val Loss: 0.2829
Epoch 10: Train Loss: 0.2756, Val Loss: 0.2759
Epoch 11: Train Loss: 0.2714, Val Loss: 0.2770
Epoch 12: Train Loss: 0.2664, Val Loss: 0.2663
Epoch 13: Train Loss: 0.2622, Val Loss: 0.2657
Epoch 14: Train Loss: 0.2585, Val Loss: 0.2640
Epoch 15: Train Loss: 0.2548, Val Loss: 0.2640
Epoch 16: Train Loss: 0.2510, Val Loss: 0.2569
Epoch 17: Train Loss: 0.2475, Val Loss: 0.2470
Epoch 18: Train Loss: 0.2446, Val Loss: 0.2593
Epoch 19: Train Loss: 0.2401, Val Loss: 0.2421
Epoch 20: Train Loss: 0.2366, Val Loss: 0.2421
Epoch 21: Train Loss: 0.2343, Val Loss: 0.2410
Epoch 22: Train Loss: 

In [None]:
# Testing
test_loss = 0.0
pc_rbfn.eval()  # Ensure model is in evaluation mode
with torch.no_grad():  # No gradients needed
    for inputs, targets in dl_test:
        outputs = pc_rbfn(inputs).squeeze(1)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
test_loss /= len(dl_test.dataset)
rmse = np.sqrt(test_loss)  # Calculate RMSE
print(f"Test RMSE: {rmse:.4f}")

# MNIST

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision import datasets

# Set random seed for reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Define the transformation to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

# Load MNIST dataset
train_val_dataset = datasets.MNIST('datasets', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('datasets', train=False, download=True, transform=transform)

# Split train dataset into train and validation
train_size = int(len(train_val_dataset) * 0.9)
val_size = len(train_val_dataset) - train_size
train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])

# DataLoader
batch_size = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)







In [None]:
for batch_idx, (data, target) in enumerate(train_loader):
    print(f"Batch index: {batch_idx}")
    print(f"Input tensor shape: {data.shape}")  # Shape will be [batch_size, channels, height, width] for images
    print(f"Target tensor shape: {target.shape}")  # Shape will be [batch_size] for the labels
    # Optionally break after the first batch to just see one example
    break

In [None]:
from cirkit.region_graph.poon_domingos import PoonDomingos
from cirkit.region_graph.random_binary_tree import RandomBinaryTree
from cirkit.region_graph.fully_factorized import FullyFactorized
from cirkit.region_graph.quad_tree import QuadTree
region_graph = QuadTree(28, 28, struct_decomp=True)
# region_graph = RandomBinaryTree(num_vars=8, depth=3, num_repetitions=1)
# region_graph = FullyFactorized(num_vars=8)


from cirkit.layers.input.exp_family import CategoricalLayer
from cirkit.layers.sum_product import CPLayer
from cirkit.layers.sum_product.cp_w_bias import CPLayerWithBias
from cirkit.layers.input.rbf_network_kernel import RBFNetworkKernelLayer

efamily_cls = RBFNetworkKernelLayer
efamily_kwargs = {}
layer_cls = CPLayerWithBias
layer_kwargs = {'rank': 1}

num_units = 32 # 356


from cirkit.reparams.leaf import ReparamExp, ReparamLogSoftmax, ReparamSoftmax, ReparamIdentity
from cirkit.models.tensorized_circuit import TensorizedPC
pc_rbfn = TensorizedPC.from_region_graph(
    region_graph,
    num_inner_units=num_units,
    num_input_units=num_units,
    efamily_cls=efamily_cls,
    efamily_kwargs=efamily_kwargs,
    layer_cls=layer_cls,
    layer_kwargs=layer_kwargs,
    num_classes=10,
    reparam=ReparamIdentity # ReparamExp  # ReparamLogSoftmax
)
pc_rbfn.to(device)
print(pc_rbfn)


In [None]:
for param in pc_rbfn.parameters():
    print(param.shape)

In [None]:
# Instantiate the model, loss function, and optimizer
model = pc_rbfn
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training and validation loop
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(val_loader)

    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

# Add your test loop here if needed, following the same pattern as validation
# Remember to evaluate the model's performance on the test set after training