In [1]:
from typing import List, Tuple
import random
from math import cos, tanh, pi

# Conformal prediction from human preferences

In this notebook I aim to explore how we can use conformal prediction to make model-free risk-controlled prediction from human preferences. We will start from a simple case study.

# Simple case

For our simple case, we will consider input data of the form $x_1,\ldots,x_d$ and will define a utility function $U(x_1,\ldots,x_d) = u$ as a low degree polynomial function. For example a linear function. To make it simpler, we will restrict all the coefficients to be in the range $[-1/d,1/d]$, and the input features to be in the range $[-1,1]$, so that the output is between $-1$ and $1$.

In [2]:
def clip(x, low, high):
    return min(max(x, low), high)
def U(x, coefficients):
    #coefficients /= np.linalg.norm(coefficients, ord = 1)
    return cos(sum(coefficients * x) % 2 - 1 + random.uniform(-0.3, 0.3))

Next we need a model to predict the utility function, or in other words, fit a model to replicate the behavior of $U(a)-U(b)$. The output of the model will be a softmax distribution over bins between $-1$ and $1$.
We will follow Pytorch Lightning's [LightningModule](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html) to define our model.

In [3]:
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
import torch

class LitModel(pl.LightningModule):

    """ PyTorch Lightning model.
    Outputs the probability that model U(a,b) is in bin i.
    
    Args:
        input_features (int): Number of input features of each of the two inputs.
        output_predictions (int): Number of output prediction bins.
        hidden_dim (int): Number of hidden units in the hidden layer.
        layers (int): Number of hidden layers.
    """

    def __init__(self, input_features, output_predictions, hidden_dim=8, layers = 1):
        self.input_features = input_features
        self.output_predictions = output_predictions
        self.hidden_dim = hidden_dim
        self.layers = layers
        super().__init__()

        self.initial = nn.Sequential(
            nn.Linear(2*self.input_features, self.hidden_dim),
            nn.ReLU()
        )

        self.backbone_block = nn.Sequential(
            nn.Linear(self.hidden_dim, self.hidden_dim),
            nn.ReLU()
        )

        self.head = nn.Sequential(
            nn.Linear(self.hidden_dim, self.output_predictions),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.initial(x)
        for i in range(self.layers):
            x = self.backbone_block(x)
        x = self.head(x)
        return x/(x.sum(dim=1).unsqueeze(1))

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.l1_loss(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)

In [4]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

def create_dataloader(x_list: list, y_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    tensor_y = torch.Tensor(np.asarray(y_list))
    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    return DataLoader(my_dataset, num_workers = 4) # create your dataloader

def create_predict_dataloader(x_list: list):
    tensor_x = torch.Tensor(np.asarray(x_list)) # transform to torch tensor
    return DataLoader(tensor_x, num_workers = 4) # create your dataloader

The examples will be generated using the following function, which assigns them to bins.

In [5]:
# Generate random coefficients
#coefficients = np.random.uniform(-1, 1, num_features)
coefficients = np.array([0.5, 0.5, .2])

def generate_examples(num_examples, num_features, num_bins, coefficients = coefficients):
    """Generates examples of human preferences
    If we decide to use a binary loss function, it is sufficient with num_bins = 2.
    """

    # Generate random inputs
    x0 = np.random.normal(loc = 0.15, scale = 0.45, size = (num_examples, num_features))
    x1 = np.random.normal(loc = -0.15, scale = 0.45, size = (num_examples, num_features))

    # Compute the utility of each input
    u = np.array([U(x0[i], coefficients) - U(x1[i], coefficients) for i in range(num_examples)])

    # Compute the bin of each input
    bins = np.array([np.digitize(u[i], np.linspace(-1, 1, num_bins-1)) for i in range(num_examples)])

    # Create the input list
    x_list = []
    for i in range(num_examples):
        x_list.append(np.concatenate((x0[i], x1[i])))

    # Create the output list
    y_list = []
    for i in range(num_examples):
        y = np.zeros(num_bins)
        y[bins[i]] = 1
        y_list.append(y)

    return x_list, y_list

x, y = generate_examples(10, 3, 20, coefficients = np.array([0.5, 0.5, .2]))
x[:10]

[array([ 0.84067288, -0.32475976, -0.13882388,  0.51749858, -0.01969603,
         0.29098055]),
 array([-0.93276285,  0.25244101,  0.02546276, -0.51575125,  0.20607225,
        -0.4723319 ]),
 array([-0.02033019,  0.34439425,  0.02285187,  0.91740003,  0.1009611 ,
         0.36296109]),
 array([-0.08996239, -0.53661993,  0.47932938, -0.48975252, -0.04668654,
        -0.99545865]),
 array([ 0.28480336, -0.28763758,  0.75933569, -0.33976112, -0.24806751,
         0.26215962]),
 array([ 0.1573095 ,  0.36363585, -0.34761746,  0.19407652, -0.01781701,
        -0.27275464]),
 array([ 0.71923183,  0.48632117, -0.34382168, -0.81455641, -0.22379036,
        -0.33653165]),
 array([-0.16650034,  0.49525262,  0.49470018, -1.13733938,  0.31122036,
         0.3794651 ]),
 array([-0.99020551,  0.19733143,  0.93060258,  0.15912254,  0.50473908,
         0.47848913]),
 array([ 0.3637412 ,  0.47423591, -0.75495658,  0.01284221, -0.22261968,
        -0.50860986])]

We can then train a simple model

In [6]:
num_examples = 1000
num_features = 3
num_bins = 20
x_list, y_list = generate_examples(num_examples = num_examples, num_features = num_features, num_bins = num_bins)
train_loader = create_dataloader(x_list, y_list)
predict_loader = create_predict_dataloader(x_list)
trainer = pl.Trainer(max_epochs=5)
model = LitModel(input_features=num_features, output_predictions=num_bins)

trainer.fit(model, train_dataloaders=train_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name           | Type       | Params
----------------------------------------------
0 | initial        | Sequential | 56    
1 | backbone_block | Sequential | 72    
2 | head           | Sequential | 180   
----------------------------------------------
308       Trainable params
0         Non-trainable params
308       Total params
0.001     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


# Conformal prediction

Remember that we defined the loss to be
$$
     \mathcal{L}(U, a_i,b_i,y_i) =  y_i(U(a_i)-U(b_i)) + (1-y_i)(U(b_i)-U(a_i)),
$$
for $y_i \in \{0,1\}$ the true preference, and $U$ the learned utility function.

In the conformal prediction procedure we now follow the following steps:
1. We have to define the set $\mathcal{C}_\alpha$:
$$
\mathcal{C}_\alpha(a_i,b_i) = \{u_i = U(a_i)-U(b_i) \in \mathbb{R}: \rho( u_i )\geq  1-\alpha \}
$$
where $\rho$ is the cumulative distribution function of the model's output distribution.

In [7]:
def C(alpha: float, x_list: torch.Tensor):
    loader = DataLoader(torch.Tensor(x_list))
    predictions = trainer.predict(model,loader)
    p = []
    for prediction in predictions:
        prediction = torch.flatten(prediction)
        p.append(torch.where(prediction > alpha, torch.ones_like(prediction), torch.zeros_like(prediction)))
    return torch.stack(p)

In [8]:
loader = DataLoader(torch.Tensor(np.asarray(x_list)))
predictions = trainer.predict(model,loader)
predictions[:10]

  rank_zero_warn(


Predicting: 1000it [00:00, ?it/s]

[tensor([[2.8451e-23, 2.4733e-22, 7.4837e-24, 2.6988e-23, 7.3657e-22, 4.2140e-19,
          9.8413e-17, 5.3106e-11, 5.0000e-01, 5.0000e-01, 6.5462e-13, 1.1370e-11,
          1.8816e-17, 1.7446e-17, 3.5350e-23, 4.3293e-23, 1.0126e-22, 3.2488e-22,
          2.6642e-22, 9.0426e-24]]),
 tensor([[1.4185e-07, 3.2132e-07, 1.1644e-07, 1.9783e-07, 2.6749e-07, 7.8826e-06,
          1.1076e-05, 8.6882e-04, 4.9251e-01, 4.9306e-01, 1.7450e-03, 1.1713e-02,
          6.4343e-05, 2.0709e-05, 1.9226e-07, 9.6952e-08, 2.0004e-07, 3.9626e-07,
          1.7994e-07, 1.2045e-07]]),
 tensor([[1.8437e-16, 3.5246e-15, 1.1204e-16, 6.6664e-16, 3.4538e-15, 1.3431e-12,
          1.0912e-11, 4.7068e-07, 5.0000e-01, 5.0000e-01, 8.9636e-07, 6.2206e-09,
          2.2556e-11, 1.8833e-11, 2.8003e-16, 1.3551e-16, 4.3965e-16, 5.4832e-15,
          9.5530e-16, 7.9100e-17]]),
 tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 1.0634e-24, 3.3333e-01, 3.3333e-01, 3.7898e-19,

In [9]:
y_list[:10]

[array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])]

In [10]:
a = C(0.3, x_list)
a

  loader = DataLoader(torch.Tensor(x_list))


Predicting: 1000it [00:00, ?it/s]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [11]:
a[0:10]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0.]])

## Learn the test procedure

We want to control teh risk in the test procedure. We define the risk at a given value of $\alpha$ as
$$\hat{R}(\alpha) = \frac{1}{n}\sum_{i=1}^n \mathcal{L}(\mathcal{C}_\alpha(a_i,b_i), y_i)$$
where $\mathcal{L}$ is the loss function and $\mathcal{C}_\alpha$ the conformal prediction set.

With the risk we can now define the p-value
$$p_\alpha^{\text{Hoeffding}} = e^{-2n(\lambda - \hat{R}(\alpha))^2}$$
where $n$ the train size.

Then, we can implement a familywise-error rate control procedure, for example the Bonferroni correction.

# Detecting distribution shifts

## Time-stratified coverage metric

We want to use the time-stratified coverage metric to detect distribution shifts. 
$$ \text{Time Stratified Loss metric:}\quad    \min_{t\in \{1,\ldots,T\}}\frac{1}{|\mathcal{I}_t|}\sum_{i\in\mathcal{I}_t}\mathcal{L}(\mathcal{C}_\alpha(x_i),y_i)$$
which is basically detecting whether the average loss is significantly different in the test set.

## Full conformal prediction / inductive conformal predictors

We define conformal predictors as
$$\Gamma^\epsilon(z_1,\ldots,z_n)(x_{n+1}) = \{y|p^y\geq \epsilon\}$$