# Uncertainty KDE support

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize
from scipy.spatial import distance
from stats import KDE, GaussianMixture

In [None]:
gm = GaussianMixture([-1, 1], [.5, .3])
(xpdf,), ypdf = gm.pdf(500, [-5], [5])
dx = np.mean(np.diff(xpdf))

In [None]:
plt.plot(xpdf, ypdf)

In [None]:
def total_probability(alpha, xpdf, ypdf):
    total = 0
    for i in range(len(xpdf)-1):
        if ypdf[i] > alpha and ypdf[i+1] > alpha:
            total += (xpdf[i+1] - xpdf[i]) * (ypdf[i] + ypdf[i+1]) / 2
        elif ypdf[i] > alpha:
            total += (ypdf[i]+alpha)*(ypdf[i]-alpha)*(xpdf[i+1]-xpdf[i])/(2*(ypdf[i]-ypdf[i+1]))
        elif ypdf[i+1] > alpha:
            total += (ypdf[i+1]+alpha)*(ypdf[i+1]-alpha)*(xpdf[i+1]-xpdf[i])/(2*(ypdf[i+1]-ypdf[i]))
    return total

In [None]:
def f(alpha, level, xpdf, ypdf):
    return (total_probability(alpha, xpdf, ypdf) - level)**2

In [None]:
result = minimize(f, 0.1, args=(.9, xpdf, ypdf))
alpha = result.x

In [None]:
set1 = np.atleast_2d(xpdf[ypdf > alpha]).T

In [None]:
np.random.seed(0)
n = 40
nrepeat = 100

distances = np.zeros(nrepeat)
for i in range(nrepeat):
    x = gm.generate_samples(n)
    k = KDE(x)
    k.set_bandwidth(.3)
    ykde = k.score_samples(xpdf)
    r = minimize(f, 0.1, args=(.9, xpdf, ykde))
    set2 = np.atleast_2d(xpdf[ykde > r.x]).T
    distances[i] = distance.directed_hausdorff(set1, set2)[0]

In [None]:
np.mean(distances)

In [None]:
np.std(distances)

In [None]:
np.random.seed(0)
x = gm.generate_samples(n)
k = KDE(x)
k.set_bandwidth(.3)
ykde = k.score_samples(xpdf)
r = minimize(f, 0.1, args=(.9, xpdf, ykde))
set2 = np.atleast_2d(xpdf[ykde > r.x]).T

distances2 = np.zeros(nrepeat)
for i in range(nrepeat):
    k = KDE(np.random.choice(x[:, 0], len(x)))
    k.set_bandwidth(.3)
    ykde = k.score_samples(xpdf)
    r = minimize(f, 0.1, args=(.9, xpdf, ykde))
    set3 = np.atleast_2d(xpdf[ykde > r.x]).T
    distances2[i] = distance.directed_hausdorff(set2, set3)[0]

In [None]:
np.mean(distances2)

In [None]:
np.std(distances2)