In [1]:
import torch
import math
from utils import balanced_entropy, epistemic_uncertainty, aleatoric_uncertainty

### In this notebook, we demonstrate how to calculate uncertainty values given from reward samples.

##### Let's assume that MC-dropout samples of R1-R2 are given as follows.

In [2]:
reward_gap_samples = torch.tensor(
    [[-0.5071,  0.3939,  0.6309,  0.5251,  1.2876,  1.1699,  0.5775,  1.1834,
       0.1770,  0.1188, -0.5556,  0.1510,  1.7552,  1.0578,  0.8473,  0.0886,
       1.3317,  1.1074, -0.2550,  0.0272,  0.9689,  0.8330,  0.5621,  0.2605,
      -0.0145]]) # we use 25 samples

In [3]:
sample_mean = torch.mean(reward_gap_samples, dim=1)
sample_std = torch.std(reward_gap_samples, dim=1)
sample_mean, sample_std

(tensor([0.5489]), tensor([0.6020]))

# Balanced Entropy

In [4]:
balanced_entropy(sample_mean, sample_std)

tensor([-0.0047])

# Sigmoid Transformation

In [5]:
logits = torch.nn.functional.logsigmoid(reward_gap_samples.to(torch.float32))
logits = torch.log(torch.stack([torch.exp(logits), 1.-torch.exp(logits)+1e-128], dim=2))

# Epistemic Uncertainty

In [6]:
epistemic_uncertainty(logits)

tensor([0.0374])

# Aleatoric Uncertainty

In [7]:
aleatoric_uncertainty(logits)

tensor([0.6245])