In [None]:
import numpy as np
import matplotlib.pyplot as plt
from fastkde import KDE
from gaussianmixture import GaussianMixture
from tqdm import tqdm_notebook as tqdm
import scipy
%matplotlib inline

## Generate data

In [None]:
N = 200
NPDF = 101
SEED = 0
XLIM = [-3, 3]
MU = [[-1, -1], [1, 1]]
SIGMA = [[[0.5, 0], [0, 0.5]]]*2

# Create object for generating data from a Gaussian mixture
GM = GaussianMixture(np.array(MU), np.array(SIGMA))

# Generate data
np.random.seed(SEED)
X = GM.generate_samples(N)

# Create the real pdfs
(X1PDF, X2PDF), YPDFA = GM.pdf(minx=[XLIM[0], XLIM[0]], maxx=[XLIM[1], XLIM[1]], npoints=NPDF)

## KDE fit

In [None]:
kde = KDE(X)
kde.compute_bandwidth()
kde.score_leave_one_out()

In [None]:
kdes = [KDE(X[:, i]) for i in range(2)]
xpdf = np.linspace(XLIM[0], XLIM[1], NPDF)
Xscaled = np.zeros_like(X)
for i, k in enumerate(kdes):
    k.compute_bandwidth()
    k.set_score_samples(xpdf)
    cdf = np.cumsum(k.score_samples())*np.gradient(xpdf)
    Xscaled[:, i] = np.interp(X[:, i], xpdf, cdf)

In [None]:
copula = KDE(Xscaled)
copula.compute_bandwidth()

In [None]:
copula.bandwidth

In [None]:
copula.score_leave_one_out()

In [None]:
kdes[0].score_leave_one_out() + kdes[1].score_leave_one_out()

In [None]:
kdes[0].set_score_samples(X[:, 0], compute_difference=True)
kdes[1].set_score_samples(X[:, 1], compute_difference=True)

In [None]:
kdes[0].data_helpers['difference'].shape

In [None]:
np.sum(np.log(np.abs(kdes[0].gradient_samples())*np.abs(kdes[1].gradient_samples()))) + copula.score_leave_one_out()

## One-dimensional case

In [None]:
N = 200
NPDF = 1001
SEED = 0
XLIM = [-5, 5]
MU = [-1, 1]
SIGMA = [0.3, 0.5]
NREPEATS = 5

# Create object for generating data from a Gaussian mixture
GM = GaussianMixture(np.array(MU), np.array(SIGMA))

# Create the real pdfs
(XPDF,), YPDF = GM.pdf(minx=[XLIM[0]], maxx=[XLIM[1]], npoints=NPDF)

In [None]:
# Functions needed for optimization
def objective(alpha):
    return 0.5*np.dot(np.dot(alpha, K), alpha)

def jac(alpha):
    return np.dot(alpha, K)

def constraint(alpha):
    return np.sum(alpha) - 1

def jac_constraint(alpha):
    return np.ones(N)

def f(x):
    if isinstance(x, (float, int)):
        x = [x]
    y = np.zeros(len(x))
    for j, xx in enumerate(x):
        summation = -rho
        for i in range(N):
            summation += alpha[i]*np.exp(-(X[i]-xx)**2/c)
        y[j] = summation # np.sign(summation)
    return y

In [None]:
np.random.seed(SEED)
coverage = np.zeros(NREPEATS)
size = np.zeros(NREPEATS)
coverage2 = np.zeros(NREPEATS)
size2 = np.zeros(NREPEATS)

for i in tqdm(range(NREPEATS)):
    # Generate data
    X = GM.generate_samples(N)

    # Estimate KDE
    kde = KDE(X)
    kde.compute_bandwidth()
    kde.set_score_samples(XPDF)
    ypdf_estimated = kde.score_samples()

    # Estimate epsilon
    dx = np.mean(np.diff(XPDF))
    ypdf_sorted = np.sort(ypdf_estimated)
    epsilon = np.interp(0.1, np.cumsum(ypdf_sorted*dx), ypdf_sorted)

    # Calculate size of subset and its actual coverage
    index = ypdf_estimated > epsilon
    coverage[i] = np.sum(YPDF[index])*dx
    size[i] = np.sum(index)*dx
    
    
    # Constants that are used for optimization
    c = 0.2
    nu = 0.1
    x0 = np.zeros(N)
    random_indices = (np.random.rand(int(nu*N)+1)*N).astype(np.int)
    x0[random_indices[:-1]] = 1/(nu*N)
    x0[random_indices[-1]] = 1 - np.sum(x0)

    # Compute Kernel matrix
    K = np.eye(N)
    for j in range(1, N):
        for k in range(j):
            K[j, k] = np.exp(-(X[j] - X[k])**2 / c)
            K[k, j] = K[j, k]

    # Perform optimization
    result = scipy.optimize.minimize(objective, x0, bounds=((0, 1/(N*nu)),)*N, jac=jac,
                                     constraints=dict(type='eq', fun=constraint, jac=jac_constraint))
    
    # Compute coverage and size
    alpha = result.x
    tmp = (0.2/(nu*N) < alpha)*(alpha < 0.8/(nu*N))
    rho = np.dot(alpha, K)
    rho = np.mean(rho[tmp])
    y = f(XPDF)
    index = y > 0
    coverage2[i] = np.sum(YPDF[index])*dx
    size2[i] = np.sum(index)*dx

In [None]:
coverage

In [None]:
coverage2

In [None]:
size

In [None]:
size2