In [1]:
from typing import List, Tuple

# Conformal prediction from human preferences

In this notebook I aim to explore how we can use conformal prediction to make model-free risk-controlled prediction from human preferences. We will start from a simple case study.

# Simple case

For our simple case, we will consider input data of the form $x_1,\ldots,x_d$ and will define a utility function $U(x_1,\ldots,x_d) = u$ as a low degree polynomial function. For example a linear function. To make it simpler, we will restrict all the coefficients to be in the range $[-1/d,1/d]$, and the input features to be in the range $[-1,1]$, so that the output is between $-1$ and $1$.

In [2]:
def U(x, coefficients):
    #coefficients /= np.linalg.norm(coefficients, ord = 1)
    return sum(coefficients * x) % 2 - 1

Next we need a model to predict the utility function, or in other words, fit a model to replicate the behavior of $U(a)-U(b)$. The output of the model will be a softmax distribution over bins between $-1$ and $1$.
We will follow Pytorch Lightning's [LightningModule](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html) to define our model.

In [3]:
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torch

class LitModel(pl.LightningModule):

    """ PyTorch Lightning model.
    Outputs the probability that model U(a,b) is in bin i.
    
    Args:
        input_features (int): Number of input features of each of the two inputs.
        output_predictions (int): Number of output prediction bins.
        hidden_dim (int): Number of hidden units in the hidden layer.
        layers (int): Number of hidden layers.
    """

    def __init__(self, input_features, output_predictions, hidden_dim=128, layers = 1):
        self.input_features = input_features
        self.output_predictions = output_predictions
        self.hidden_dim = hidden_dim
        self.layers = layers
        super().__init__()

        self.initial = nn.Sequential(
            nn.Linear(2*self.input_features, self.hidden_dim),
            nn.ReLU()
        )

        self.backbone_block = nn.Sequential(
            nn.Linear(self.hidden_dim, self.hidden_dim),
            nn.ReLU()
        )

        self.head = nn.Sequential(
            nn.Linear(self.hidden_dim, self.output_predictions),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.initial(x)
        for i in range(self.layers):
            x = self.backbone_block(x)
        x = self.head(x)
        return x/(x.sum(dim=1).unsqueeze(1))

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.l1_loss(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

In [4]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

def create_dataloader(x_list: list, y_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    tensor_y = torch.Tensor(np.asarray(y_list))
    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    return DataLoader(my_dataset, num_workers = 4) # create your dataloader

def create_predict_dataloader(x_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    return DataLoader(tensor_x, num_workers = 4) # create your dataloader

The examples will be generated using the following function, which assigns them to bins.

In [5]:
# Generate random coefficients
#coefficients = np.random.uniform(-1, 1, num_features)
coefficients = np.array([0.5, 0.5, .2])

def generate_examples(num_examples, num_features, num_bins, coefficients = coefficients):
    """Generates examples of human preferences
    If we decide to use a binary loss function, it is sufficient with num_bins = 2.
    """

    # Generate random inputs
    x0 = np.random.normal(loc = 0.25, scale = 0.3, size = (num_examples, num_features))
    x1 = np.random.normal(loc = -0.25, scale = 0.3, size = (num_examples, num_features))

    # Compute the utility of each input
    u = np.array([U(x0[i], coefficients) - U(x1[i], coefficients) for i in range(num_examples)])

    # Compute the bin of each input
    bins = np.array([np.digitize(u[i], np.linspace(-1, 1, num_bins-1)) for i in range(num_examples)])

    # Create the input list
    x_list = []
    for i in range(num_examples):
        x_list.append(np.concatenate((x0[i], x1[i])))

    # Create the output list
    y_list = []
    for i in range(num_examples):
        y = np.zeros(num_bins)
        y[bins[i]] = 1
        y_list.append(y)

    return x_list, y_list

x, y = generate_examples(10, 3, 20, coefficients = np.array([0.5, 0.5, .2]))
x[:10]

[array([-0.23667894,  0.15674154,  0.41631417, -0.11894899, -0.51452757,
        -0.04644283]),
 array([ 0.22626756, -0.09963304,  0.01681321, -0.43850014,  0.46128857,
        -0.2626279 ]),
 array([ 0.5239704 ,  0.22037693, -0.07098589, -0.11309304, -0.22264973,
        -0.31765782]),
 array([-0.53670946,  0.22690499,  0.15271576, -0.21605744, -0.40273874,
         0.10345592]),
 array([ 0.41519535,  0.43203526,  0.48316824, -0.67198386,  0.03727755,
        -0.04733726]),
 array([ 0.37779181,  0.1335171 , -0.01670557, -0.50545807, -0.19671323,
         0.17591687]),
 array([-0.1111115 ,  0.50739006,  0.40778453, -0.06110276, -0.15677432,
        -0.35261553]),
 array([ 0.19243059, -0.15630802,  0.4487055 , -0.66165885, -0.59536609,
         0.09727422]),
 array([ 0.05840442,  0.20977983,  0.15179187, -0.36763055, -0.56696353,
        -0.27947376]),
 array([ 0.41965484,  0.60667182,  0.082924  , -0.42744416, -0.23043137,
        -0.20951881])]

We can then train a simple model

In [6]:
num_examples = 1000
num_features = 3
num_bins = 20
x_list, y_list = generate_examples(num_examples = num_examples, num_features = num_features, num_bins = num_bins)
train_loader = create_dataloader(x_list, y_list)
predict_loader = create_predict_dataloader(x_list)
trainer = pl.Trainer(max_epochs=5)
model = LitModel(input_features=num_features, output_predictions=num_bins)

trainer.fit(model, train_dataloaders=train_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name           | Type       | Params
----------------------------------------------
0 | initial        | Sequential | 896   
1 | backbone_block | Sequential | 16.5 K
2 | head           | Sequential | 2.6 K 
----------------------------------------------
20.0 K    Trainable params
0         Non-trainable params
20.0 K    Total params
0.080     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


# Conformal prediction

Remember that we defined the loss to be
$$
     \mathcal{L}(U, a_i,b_i,y_i) =  y_i(U(a_i)-U(b_i)) + (1-y_i)(U(b_i)-U(a_i)),
$$
for $y_i \in \{0,1\}$ the true preference, and $U$ the learned utility function.

In the conformal prediction procedure we now follow the following steps:
1. We have to define the set $\mathcal{C}_\alpha$:
$$
\mathcal{C}_\alpha(a_i,b_i) = \{u_i = U(a_i)-U(b_i) \in \mathbb{R}: \rho( u_i )\geq  1-\alpha \}
$$
where $\rho$ is the cumulative distribution function of the model's output distribution.

In [7]:
def C(alpha: float, x_list: torch.Tensor):
    loader = DataLoader(torch.Tensor(x_list))
    predictions = trainer.predict(model,loader)
    p = []
    for prediction in predictions:
        prediction = torch.flatten(prediction)
        p.append(torch.where(prediction > alpha, torch.ones_like(prediction), torch.zeros_like(prediction)))
    return torch.stack(p)

In [8]:
loader = DataLoader(torch.Tensor(np.asarray(x_list)))
predictions = trainer.predict(model,loader)
predictions[:10]

  loader = DataLoader(torch.Tensor(x_list))
  rank_zero_warn(


Predicting: 1000it [00:00, ?it/s]

[tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0.]]),
 tensor([[1.0000e+00, 7.6691e-25, 9.6473e-27, 2.9664e-25, 2.4066e-21, 1.5226e-26,
          1.0534e-24, 4.5096e-26, 2.9778e-25, 2.0355e-25, 2.2128e-26, 1.4581e-21,
          1.4540e-24, 3.0809e-21, 3.7551e-26, 1.1048e-25, 6.1125e-25, 1.1821e-26,
          2.0940e-28, 1.4210e-25]]),
 tensor([[1.0000e+00, 2.9918e-30, 1.3230e-32, 9.3888e-31, 6.6670e-26, 2.3334e-32,
          3.9491e-30, 9.4167e-32, 8.4926e-31, 5.8756e-31, 4.1166e-32, 3.1195e-26,
          6.3448e-30, 7.1682e-26, 6.9774e-32, 2.6030e-31, 2.2062e-30, 1.7108e-32,
          1.0669e-34, 3.6415e-31]]),
 tensor([[1.0000e+00, 1.2073e-36, 0.0000e+00, 2.6772e-37, 2.3462e-31, 3.1904e-39,
          1.5786e-36, 1.6515e-38, 2.6184e-37, 1.5123e-37, 6.3095e-39, 7.0394e-32,
          2.5056e-36, 2.2155e-31, 1.1581e-38, 6.0424e-38, 7.7258e-37, 0.0000e+00,
          0.0000e+00, 7.9018e-38]]),
 tensor([[1.0000e+00, 6.6551e-24, 9.6939e-26, 3.

In [9]:
y_list[:10]

[array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])]

In [10]:
a = C(0.5, x_list)
a

Predicting: 1000it [00:00, ?it/s]

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])