# Identification vs Detection: Visualizing differences on simplex

In [100]:
import sys
import itertools
import collections
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt

from utils.visualization import plot_pdf_triplex, plot_prob_triplex, reliability_plot, ECE_plot
from utils.ops import onehot_encode

In [101]:
%matplotlib inline

### Generate mesh of probabilities

In [103]:
%%time
indep_probs = np.array([np.array(tup) for tup in itertools.product(np.linspace(0, 1, num=100), repeat=2)])
indep_probs = indep_probs[np.where(np.sum(indep_probs, axis=1)<=1)]
probs = np.hstack((indep_probs, 1.-np.sum(indep_probs, axis=1, keepdims=True)))

target = onehot_encode(np.argmax(probs, axis=1))

CPU times: user 16.8 ms, sys: 30 µs, total: 16.8 ms
Wall time: 16.2 ms


### Compute the relative log-likelihoods

In [107]:
RLL = np.log(probs + 1e-7)
# Start asumming flat prior
priors = np.zeros(3) + 1./3

#### Identification problem: 
Choose $t$ such that for any $i \neq t$ the resulting log-likelihood-ratio $\lambda^{t}_{i} = \lambda_{t} - \lambda_{i}$ is greater than the threshold: $\theta^{t}_{i} = -log(\pi_{t}) + log(\pi_{i})$, where $\{\lambda_i\}$ are the relative-log-likelihoods and $\{\pi_i\}$ is the prior probability distribution.

In [119]:
identified = [set([0, 1, 2]) for _ in range(RLL.shape[0])]

In [120]:
for t in range(3):
    for i in range(3):
        if t==i:
            continue
        llr = RLL[:, t] - RLL[:, i]
        th = -np.log(priors[t]) + np.log(priors[i])
        for k in range(len(identified)):
            if llr[k] < th:
                identified[k].discard(t)

In [121]:
identified = np.array([np.array(list(k)) for k in identified])
identified

array([[2],
       [2],
       [2],
       ...,
       [0],
       [0],
       [0]])

In [126]:
collections.Counter(identified.flatten().tolist())

Counter({2: 1667, 1: 1680, 0: 1697})

#### Detection problem: