In [None]:
import numpy as np
import pandas as pd
import histogrammar as hg
import matplotlib.pyplot as plt

In [None]:
import matplotlib

In [None]:
# precision-recall curve and f1
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from matplotlib import pyplot
from scipy.stats import chi2, norm


In [None]:
from kde_utils import kde_process_data, kde_make_transformers, kde_bw

In [None]:
%matplotlib inline

In [None]:
plt.style.use('ggplot')
plt.rc('font', size=12)


In [None]:
# generate fake y and y_prob

In [None]:
np.random.seed(43)

In [None]:
X0 = np.random.exponential(0.4, 2000)
X0 = X0[X0 < 1]
y0 = np.zeros(len(X0))

X1 = np.random.exponential(0.25, 1000)
X1 = 1. - X1[X1 < 1]
y1 = np.ones(len(X1))

In [None]:
X = np.concatenate([X0, X1])
y = np.concatenate([y0, y1])

In [None]:
h0 = hg.SparselyBin(binWidth=0.02)
h0.fill.numpy(X0)
h1 = hg.SparselyBin(binWidth=0.02)
h1.fill.numpy(X1)

h0.plot.matplotlib(alpha=0.5)
h1.plot.matplotlib(alpha=0.5)

In [None]:
precision, recall, thresholds = precision_recall_curve(y, X)

In [None]:
# plot the precision-recall curves (sklearn)
plt.figure(figsize=(12,7))
no_skill = len(y[y==1]) / len(y)
plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
plt.plot(recall, precision, marker='.', label='Classifier')
# axis labels
plt.xlabel('Recall')
plt.ylabel('Precision')
# show the legend
plt.legend()
plt.grid()
# show the plot
plt.show()

In [None]:
# real y and y_prob

In [None]:
# generate 2 class dataset
X, y = make_classification(n_samples=1000, n_classes=2, random_state=1)
# split into train/test sets
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, random_state=2)
# fit a model
model = LogisticRegression(solver='lbfgs')
model.fit(trainX, trainy)
# predict probabilities
lr_probs = model.predict_proba(testX)
# keep probabilities for the positive outcome only
lr_probs = lr_probs[:, 1]
# predict class values
yhat = model.predict(testX)

precision, recall, thresholds = precision_recall_curve(testy, lr_probs)
lr_f1, lr_auc = f1_score(testy, yhat), auc(recall, precision)
# summarize scores
print('Logistic: f1=%.3f auc=%.3f' % (lr_f1, lr_auc))


In [None]:
# plot the precision-recall curves
plt.figure(figsize=(12,7))
no_skill = len(testy[testy==1]) / len(testy)
plt.figure(figsize=(12,7))
plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
plt.plot(recall, precision, marker='.', label='Classifier')
# axis labels
plt.xlabel('Recall')
plt.ylabel('Precision')
# show the legend
plt.legend()
plt.grid()
# show the plot
plt.show()

In [None]:
# pick a set

In [None]:
if False:
    y_true = y
    y_prob = X

In [None]:
y_true = testy # [testy==1]
y_prob = lr_probs # [testy==1]


In [None]:
y_true = y_true.astype(int)

In [None]:
precision, recall, thresholds = precision_recall_curve(y_true, y_prob)

In [None]:
# add zero threshold (missing by default?)
thresholds = np.concatenate([[0.], thresholds])

In [None]:
#############################
# Calculate Uncertainty bands

N = len(y_true)

# Getting TP, FN, FP
# remark: computing them with metrics.confusion_matrix() takes too much time
P = np.array([sum(y_true)] * len(thresholds))
# we use ">= thr" like in precision_recall_curve():
TP = np.array([((y_prob >= thr) & y_true).sum() for thr in thresholds])
PP = np.array([(y_prob >= thr).sum() for thr in thresholds])
FN = P - TP
FP = PP - TP
TN = N - TP - FP - FN

In [None]:
recall = TP / (TP + FN)
precision = TP / (TP + FP)

In [None]:
# confidence limits for 1, 2, and 3 standard deviations in 1 dimension
nstd1 = 2. * (norm.cdf(1) - 0.5)
nstd2 = 2. * (norm.cdf(2) - 0.5)
nstd3 = 2. * (norm.cdf(3) - 0.5)
#print (nstd1, nstd2, nstd3)

# confidence limits in two dimensions
l90 = chi2.ppf(0.90, 2)
# 68.3% = 1 std dev (1 dim)
l68 = chi2.ppf(nstd1, 2)
# 95.4% = 2 std dev (1 dim)
l95 = chi2.ppf(nstd2, 2)
# 99.7% = 3 std dev (1 dim)
l99 = chi2.ppf(nstd3, 2)

# scales with which to scale up r1 and r2
scale1 = np.sqrt(l68)
scale2 = np.sqrt(l95)
scale3 = np.sqrt(l99)
#print (scale1, scale2, scale3)

print (l68, l90, l95, l99)

In [None]:
from scipy.special import xlogy

def phat(rec, prec, x_tp, x_fp, x_tn, x_fn):
    """Fit probability parameters of confusion matrix under the constraint of 
    fixed recall and precision
    """
    n4 = x_tp + x_fp + x_tn + x_fn
    n3 = x_tp + x_fp + x_fn
    alpha = (1-prec)/prec + (1-rec)/rec + 1
    p_tp = (n3 / n4) * (1. / alpha)
    p_fn = ((1-rec)/rec) * p_tp
    p_fp = ((1-prec)/prec) * p_tp
    p_tn = 1. - p_fn - p_fp - p_tp 
    # prevent negative values to due machine level noise
    if isinstance(p_tn, np.ndarray):
        p_tn[p_tn < 0] = 0
    elif isinstance(p_tn, float) and p_tn < 0:
        p_tn = 0.
    return p_tp, p_fp, p_tn, p_fn

def nll(rec, prec, x_tp, x_fp, x_tn, x_fn):
    """Return -2logp of multinomial distribution fixed at certain recall and precision

    Two steps:
    1. Fit with fixed recall and precision 
    2. Fit with all probability parameters free
    
    Return the difference in -2 log L
    """
    # optimal fit of x
    n4 = x_tp + x_fp + x_tn + x_fn
    p_fn0 = x_fn / n4
    p_tp0 = x_tp / n4
    p_fp0 = x_fp / n4
    p_tn0 = x_tn / n4
    nll_minimum = -2 * xlogy(x_tp, p_tp0) - 2 * xlogy(x_fp, p_fp0) - 2 * xlogy(x_fn, p_fn0) - 2 * xlogy(x_tn, p_tn0)    

    # fit of x constrained to recall and precision 
    p_tp, p_fp, p_tn, p_fn = phat(rec, prec, x_tp, x_fp, x_tn, x_fn)    
    nll_value = -2 * xlogy(x_tp, p_tp) - 2 * xlogy(x_fp, p_fp) - 2 * xlogy(x_fn, p_fn) - 2 * xlogy(x_tn, p_tn)

    # return the difference
    return nll_value - nll_minimum

def get_PRgrid(x_tp, x_fp, x_fn, nbins = 100, epsilon = 1e-4):
    """Make a rough estimate for the range of the precision-recall grid to scan
    """

    # epsilon to prevent division by zero at edge
    # Note: true values recall=100% or prec=100% can only hit boundary if fn=0 or fp=0
    # else clip max values of recall and precision
    max_rec_clip = 0 if x_fn == 0 else epsilon
    max_prec_clip = 0 if x_fp == 0 else epsilon
    
    rec = x_tp / (x_tp + x_fn)
    prec = x_tp / (x_tp + x_fp)  

    # get rough estimates of sigma_rec and sigma_precision
    # for rec=0,1 the uncertainty formula gives zero, correct for this 
    if rec == 0:
        rec_for_sigma = 1 / (x_tp + x_fn)
    elif rec == 1:
        rec_for_sigma = (x_tp + x_fn - 1) / (x_tp + x_fn)
    else:
        rec_for_sigma = rec
    # for prec=0,1 the uncertainty formula gives zero, correct for this 
    if prec == 0:
        prec_for_sigma = 1 / (x_tp + x_fp)
    elif prec == 1:
        prec_for_sigma = (x_tp + x_fp - 1) / (x_tp + x_fp)
    else:
        prec_for_sigma = prec
    # rough estimates of sigma_rec and sigma_precision    
    sigma_rec = np.sqrt((rec_for_sigma*(1-rec_for_sigma))/(x_tp + x_fn))
    sigma_prec = np.sqrt((prec_for_sigma*(1-prec_for_sigma))/(x_tp + x_fp))

    # ranges of P and R to scan
    rec_max = min(rec + 6 * sigma_rec, 1 - max_rec_clip)
    rec_min = max(rec - 7 * sigma_rec, epsilon)
    prec_max = min(prec + 6 * sigma_prec, 1 - max_prec_clip)
    prec_min = max(prec - 7 * sigma_prec, epsilon)

    # make PR grid to scan
    rx = np.linspace(rec_min, rec_max, nbins)
    py = np.linspace(prec_min, prec_max, nbins)
    RX, PY = np.meshgrid(rx, py)
    
    return RX, PY


In [None]:
fig, ax = plt.subplots(figsize=(12, 7))

# For each point in the precision-recall curve plot an ellipse
for i, (r, p, x_tp, x_fp, x_tn, x_fn) in enumerate(zip(recall, precision, TP, FP, TN, FN)):
    RX, PY = get_PRgrid(x_tp, x_fp, x_fn)
    chi2 = nll(RX, PY, x_tp, x_fp, x_tn, x_fn)
    CS = ax.contour(RX, PY, chi2, levels=[l90])
    
# Plot precision-recall curve
cmap = plt.get_cmap("tab10")
ax.plot(recall, precision, label='observed values (test size = 500)', color='black')

ax.set_xlim((-0.05, 1.05))
ax.set_ylim((-0.05, 1.05))
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
#ax.set_title(f'Precision-Recall Curve ±1σ')
ax.set_title(f'Precision-Recall Curve 90% CL')
ax.legend(loc="lower left")
ax.grid()

plt.tight_layout()
plt.savefig('PR_uncertainties.pdf')


### NOTE requires current HEAD of unstable branch of mmu

install with:
```
pip install git+https://github.com/RUrlus/ModelMetricUncertainty@unstable
```

In [None]:
import mmu
from mmu.lib._mmu_core import multinomial_uncertainty_over_grid_thresholds

In [None]:
import scipy.stats as sts
import seaborn as sns

In [None]:
precision, recall, thresholds = precision_recall_curve(y_true, y_prob)

In [None]:
prec_grid = rec_grid = np.linspace(1e-6, 1 - 1e-6, 1000)

In [None]:
conf_mats = mmu.confusion_matrices_thresholds(y=y_true, score=y_prob, thresholds=thresholds)

### Quick and dirty implementation

`multinomial_uncertainty_over_grid` only store the chi2 statistic in scores when it is smaller than the current value for that grid point.
The function also only computes and potentially sets values in a grid with boundaries as determined by `n_sigmas` times the marginal std dev of the prec and rec.

In [None]:
scores = np.ones((prec_grid.size, rec_grid.size)) * 1e4
multinomial_uncertainty_over_grid_thresholds(
    n_conf_mats=conf_mats.shape[0],
    precs_grid=prec_grid,
    recs_grid=rec_grid,
    conf_mat=conf_mats,
    scores=scores
)

In [None]:
pvals = sts.chi2(2).sf(np.abs(scores))

df_grid = pd.DataFrame(
    pvals,
    columns=np.round(rec_grid, 4),
    index=np.round(prec_grid, 4)
).sort_index(ascending=False)

In [None]:
rec_idx = np.empty(recall.size, dtype=np.int64)
for i, rec in enumerate(recall):
    rec_idx[i] = np.abs(df_grid.columns - rec).argmin()

In [None]:
prec_idx = np.empty(precision.size, dtype=np.int64)
for i, prec in enumerate(precision):
    prec_idx[i] = np.abs(df_grid.index - prec).argmin()

In [None]:
fig, ax = plt.subplots(figsize=(14, 7))
ax = sns.heatmap(df_grid, ax=ax, vmin=0.0, vmax=1.0)
ax.plot(rec_idx, prec_idx, c='black', label='$\hat{p}, \hat{r}$')
ax.legend()
ax.set_title(f'Multinomial uncertainty Precision-Recall')
ax.set_xlabel('recall')
ax.set_ylabel('precision')
plt.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(14, 7))
ax = sns.heatmap(df_grid, ax=ax, vmin=0.0, vmax=1.0)
ax.scatter(rec_idx, prec_idx, c='black', label='$\hat{p}, \hat{r}$', s=10.0)
ax.legend()
ax.set_title(f'Multinomial uncertainty Precision-Recall')
ax.set_xlabel('recall')
ax.set_ylabel('precision')
plt.tight_layout()

In [None]:
from scipy.stats import chi2, norm

In [None]:
# confidence limits for 1, 2, and 3 standard deviations in 1 dimension
nstd1 = 2. * (norm.cdf(1) - 0.5)
nstd2 = 2. * (norm.cdf(2) - 0.5)
nstd3 = 2. * (norm.cdf(3) - 0.5)
#print (nstd1, nstd2, nstd3)

# confidence limits in two dimensions
# 68.3% = 1 std dev (1 dim)
l68 = chi2.ppf(nstd1, 2)
# 95.4% = 2 std dev (1 dim)
l95 = chi2.ppf(nstd2, 2)
# 99.7% = 3 std dev (1 dim)
l99 = chi2.ppf(nstd3, 2)

print (l68, l95, l99)

In [None]:
conf_mats = mmu.confusion_matrices_thresholds(
    y=y_true,
    score=y_prob,
    thresholds=np.linspace(1e-4, 1 - 1e-4, 1000)
)

In [None]:
scores = np.ones((prec_grid.size, rec_grid.size)) * 1e4
multinomial_uncertainty_over_grid_thresholds(
    n_conf_mats=conf_mats.shape[0],
    precs_grid=prec_grid,
    recs_grid=rec_grid,
    conf_mat=conf_mats,
    scores=scores
)

In [None]:
RX, PY = np.meshgrid(rec_grid, prec_grid)

In [None]:
# plot both sets of contours
fig, ax = plt.subplots(figsize=(12,7))
CS = ax.contour(RX, PY, scores, levels=[l68, l95, l99])
ax.clabel(CS, inline=True, fontsize=10)
ax.plot(recall, precision)
ax.grid()
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_ylim(0.0, 1.01)

ax.plot(rec, prec,'ro') 
ax.legend()
plt.tight_layout()
#plt.savefig(f'PR_exclusion_contours_{x_fp:.1f}FP.pdf')