In [1]:
from typing import List, Tuple
import random

# Conformal prediction from human preferences

In this notebook I aim to explore how we can use conformal prediction to make model-free risk-controlled prediction from human preferences. We will start from a simple case study.

# Simple case

For our simple case, we will consider input data of the form $x_1,\ldots,x_d$ and will define a utility function $U(x_1,\ldots,x_d) = u$ as a low degree polynomial function. For example a linear function. To make it simpler, we will restrict all the coefficients to be in the range $[-1/d,1/d]$, and the input features to be in the range $[-1,1]$, so that the output is between $-1$ and $1$.

In [2]:
def clip(x, low, high):
    return min(max(x, low), high)
def U(x, coefficients):
    #coefficients /= np.linalg.norm(coefficients, ord = 1)
    return clip(sum(coefficients * x) % 2 - 1 + random.uniform(-0.3, 0.3), -1, 1)

Next we need a model to predict the utility function, or in other words, fit a model to replicate the behavior of $U(a)-U(b)$. The output of the model will be a softmax distribution over bins between $-1$ and $1$.
We will follow Pytorch Lightning's [LightningModule](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html) to define our model.

In [3]:
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torch

class LitModel(pl.LightningModule):

    """ PyTorch Lightning model.
    Outputs the probability that model U(a,b) is in bin i.
    
    Args:
        input_features (int): Number of input features of each of the two inputs.
        output_predictions (int): Number of output prediction bins.
        hidden_dim (int): Number of hidden units in the hidden layer.
        layers (int): Number of hidden layers.
    """

    def __init__(self, input_features, output_predictions, hidden_dim=8, layers = 1):
        self.input_features = input_features
        self.output_predictions = output_predictions
        self.hidden_dim = hidden_dim
        self.layers = layers
        super().__init__()

        self.initial = nn.Sequential(
            nn.Linear(2*self.input_features, self.hidden_dim),
            nn.ReLU()
        )

        self.backbone_block = nn.Sequential(
            nn.Linear(self.hidden_dim, self.hidden_dim),
            nn.ReLU()
        )

        self.head = nn.Sequential(
            nn.Linear(self.hidden_dim, self.output_predictions),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.initial(x)
        for i in range(self.layers):
            x = self.backbone_block(x)
        x = self.head(x)
        return x/(x.sum(dim=1).unsqueeze(1))

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.l1_loss(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

In [4]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

def create_dataloader(x_list: list, y_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    tensor_y = torch.Tensor(np.asarray(y_list))
    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    return DataLoader(my_dataset, num_workers = 4) # create your dataloader

def create_predict_dataloader(x_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    return DataLoader(tensor_x, num_workers = 4) # create your dataloader

The examples will be generated using the following function, which assigns them to bins.

In [5]:
# Generate random coefficients
#coefficients = np.random.uniform(-1, 1, num_features)
coefficients = np.array([0.5, 0.5, .2])

def generate_examples(num_examples, num_features, num_bins, coefficients = coefficients):
    """Generates examples of human preferences
    If we decide to use a binary loss function, it is sufficient with num_bins = 2.
    """

    # Generate random inputs
    x0 = np.random.normal(loc = 0.15, scale = 0.35, size = (num_examples, num_features))
    x1 = np.random.normal(loc = -0.15, scale = 0.35, size = (num_examples, num_features))

    # Compute the utility of each input
    u = np.array([U(x0[i], coefficients) - U(x1[i], coefficients) for i in range(num_examples)])

    # Compute the bin of each input
    bins = np.array([np.digitize(u[i], np.linspace(-1, 1, num_bins-1)) for i in range(num_examples)])

    # Create the input list
    x_list = []
    for i in range(num_examples):
        x_list.append(np.concatenate((x0[i], x1[i])))

    # Create the output list
    y_list = []
    for i in range(num_examples):
        y = np.zeros(num_bins)
        y[bins[i]] = 1
        y_list.append(y)

    return x_list, y_list

x, y = generate_examples(10, 3, 20, coefficients = np.array([0.5, 0.5, .2]))
x[:10]

[array([ 0.00076351,  0.01906242,  0.02239481, -0.33327858, -0.23780232,
        -0.37198629]),
 array([-0.16386079,  0.47389309, -0.01138391,  0.40203087, -0.10618092,
        -0.06999902]),
 array([ 0.33511639, -0.11843128, -0.13927999, -0.1190407 , -0.23288039,
        -0.13702665]),
 array([-0.64881198,  0.52190198, -0.25483299,  0.27674408, -0.17055324,
        -0.24415843]),
 array([-0.07679241,  0.36893147,  0.3492154 , -0.72028302,  0.14039207,
         0.7326421 ]),
 array([-0.3000561 ,  0.73631116, -0.1421585 , -0.55122175,  0.24365848,
        -0.02189805]),
 array([-0.1456061 ,  0.05513142, -0.18299701, -0.08446045,  0.09838321,
        -0.20147086]),
 array([ 0.36203249,  0.0376628 , -0.40698167, -0.5283839 ,  0.12446166,
        -0.39343586]),
 array([ 0.27774886,  0.89379991, -0.10315757,  1.06646459, -0.21688524,
        -0.63537188]),
 array([ 0.12235522, -0.31541149, -0.03456388, -0.36951537, -0.34793511,
         0.57688398])]

We can then train a simple model

In [6]:
num_examples = 1000
num_features = 3
num_bins = 20
x_list, y_list = generate_examples(num_examples = num_examples, num_features = num_features, num_bins = num_bins)
train_loader = create_dataloader(x_list, y_list)
predict_loader = create_predict_dataloader(x_list)
trainer = pl.Trainer(max_epochs=5)
model = LitModel(input_features=num_features, output_predictions=num_bins)

trainer.fit(model, train_dataloaders=train_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name           | Type       | Params
----------------------------------------------
0 | initial        | Sequential | 56    
1 | backbone_block | Sequential | 72    
2 | head           | Sequential | 180   
----------------------------------------------
308       Trainable params
0         Non-trainable params
308       Total params
0.001     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


# Conformal prediction

Remember that we defined the loss to be
$$
     \mathcal{L}(U, a_i,b_i,y_i) =  y_i(U(a_i)-U(b_i)) + (1-y_i)(U(b_i)-U(a_i)),
$$
for $y_i \in \{0,1\}$ the true preference, and $U$ the learned utility function.

In the conformal prediction procedure we now follow the following steps:
1. We have to define the set $\mathcal{C}_\alpha$:
$$
\mathcal{C}_\alpha(a_i,b_i) = \{u_i = U(a_i)-U(b_i) \in \mathbb{R}: \rho( u_i )\geq  1-\alpha \}
$$
where $\rho$ is the cumulative distribution function of the model's output distribution.

In [7]:
def C(alpha: float, x_list: torch.Tensor):
    loader = DataLoader(torch.Tensor(x_list))
    predictions = trainer.predict(model,loader)
    p = []
    for prediction in predictions:
        prediction = torch.flatten(prediction)
        p.append(torch.where(prediction > alpha, torch.ones_like(prediction), torch.zeros_like(prediction)))
    return torch.stack(p)

In [8]:
loader = DataLoader(torch.Tensor(np.asarray(x_list)))
predictions = trainer.predict(model,loader)
predictions[:10]

  rank_zero_warn(


Predicting: 1000it [00:00, ?it/s]

[tensor([[1.0000e+00, 0.0000e+00, 8.4404e-35, 2.9256e-37, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 4.1446e-27, 3.3345e-37, 4.7213e-33, 8.1898e-11, 7.5162e-39,
          1.5378e-22, 7.1443e-37, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 2.5335e-36]]),
 tensor([[1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 1.1078e-31, 0.0000e+00, 1.7301e-38, 1.3257e-12, 0.0000e+00,
          3.4472e-26, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 0.0000e+00]]),
 tensor([[1.0000e+00, 1.0385e-29, 4.3405e-20, 1.6094e-21, 4.5177e-26, 1.0795e-26,
          3.0134e-27, 2.1127e-15, 1.8129e-21, 3.6258e-19, 3.0372e-06, 1.9079e-22,
          4.1052e-13, 3.3394e-21, 2.9149e-26, 2.2849e-27, 1.5641e-26, 5.3213e-28,
          3.8934e-27, 1.7968e-20]]),
 tensor([[3.4221e-06, 2.7941e-13, 7.3754e-12, 3.8613e-12, 1.4642e-11, 8.0692e-12,
          1.1468e-11, 6.6857e-08, 1.8256e-08, 1.4364e-06, 5.4604e-03,

In [9]:
y_list[:10]

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])]

In [10]:
a = C(0.5, x_list)
a

  loader = DataLoader(torch.Tensor(x_list))


Predicting: 1000it [00:00, ?it/s]

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])

In [11]:
a[0]

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])