In [6]:
import cvxpy as cp
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import matplotlib.patches as patches
import matplotlib.lines as lines

In [7]:
# Solve optimization problem in Liang's paper
def sdpDual(K, Y):
    n = Y.shape[0]
    y = Y[:,0]
    hB = cp.Variable((n, n), symmetric=True)
    constraints = [hB >> 0]
    constraints += [K[i, :] @ hB @ K[i, :] >= cp.square(y[i]) for i in range(n)]
    prob = cp.Problem(cp.Minimize(cp.trace(K @ hB)), constraints)
    prob.solve()
    print("Optimal value", prob.value)
    return hB.value

In [8]:
# Plot the results
def my_plot(X_t,Y_t,M_t,V_t,ylim,legend_loc='upper right'):
    # (X_t,Y_t): test data
    # M_t: mean estimator m(X_t)
    # V_t: variance estimator f(X_t)
    # The prediction interval is [M_t-sqrt{V_t},M+sqrt{V_t}]
    
    X_sort = np.sort(X_t, axis=0)
    X_sort_indices = np.argsort(X_t, axis=0)
    Y_sort = Y_t[X_sort_indices[:, 0]]
    lower_CI = M_t-np.sqrt(V_t)
    lower_CI_sort = lower_CI[X_sort_indices[:, 0]]
    upper_CI = M_t+np.sqrt(V_t)
    upper_CI_sort = upper_CI[X_sort_indices[:, 0]]
    mean = M_t[X_sort_indices[:, 0]]
    
    sns.set()
    sns.set_style("darkgrid")
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    palette = sns.color_palette("Blues_r", 4)
    sns.scatterplot(x=X_sort[:,0], y=Y_sort[:,0], color=palette[0], edgecolor='w', linewidth=0.5)
    plt.fill_between(X_sort[:,0], lower_CI_sort[:,0], upper_CI_sort[:,0], color=palette[1], alpha=0.4)
    plt.plot(X_sort, lower_CI_sort, color=palette[2], lw=2,alpha=0.6)
    plt.plot(X_sort, upper_CI_sort, color=palette[2], lw=2,alpha=0.6)
    plt.plot(X_sort, mean, '-', color='orange', linewidth=2,label="Mean")
#    plt.plot(X_sort, mean, color=palette[3], linewidth=2, label="Mean")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.ylim(ylim)
    legend_elements = [
    patches.Rectangle((0, 0), 1, 1, lw=0, color=palette[1], alpha=0.4, label="PI"),
    lines.Line2D([0], [0], color='orange', lw=2, label="Mean")]
    plt.legend(handles=legend_elements, loc=legend_loc)
#     plt.legend(loc='upper right')
    plt.show()
    # plt.savefig("plot.png", dpi=300)
    coverage = (np.power(Y_t[:,0]-M_t[:,0], 2) <= V_t[:,0]).mean()
    bandwidth = np.mean(V_t[:,0])
    print("The overall coverage is", coverage)
    print("The mean bandwidth for testing data is", bandwidth)

In [9]:
# Test 1 (known mean case: m_0=0)

# Generate i.i.d data
np.random.seed(5)
n_pre = 1000
n_opt = 100
n_adj = 100
n_t = 1000
n = n_pre+n_opt+n_adj+n_t

X = np.random.uniform(-1, 1, n).reshape(-1, 1)
Y = np.sqrt(1+25*np.power(X, 4))  * np.random.uniform(-1, 1, n).reshape(-1, 1)

X_opt = X[0:n_pre+n_opt,:].reshape(-1, 1)
Y_opt = Y[0:n_pre+n_opt,:].reshape(-1, 1)

X_adj = X[n_pre+n_opt:n_pre+n_opt+n_adj,:].reshape(-1, 1)
Y_adj = Y[n_pre+n_opt:n_pre+n_opt+n_adj,:].reshape(-1, 1)


X_t = X[n_pre+n_opt+n_adj:,:].reshape(-1, 1)
Y_t = Y[n_pre+n_opt+n_adj:,:].reshape(-1, 1)

In [None]:
# Liang's method with polynomial kernel
degree = 3
X_inner_prod = X_opt @ X_opt.T
K = np.power(1 + X_inner_prod, degree)
output = sdpDual(K, Y_opt)

In [None]:
# learn delta
K_adj = np.power(X_adj @ X_opt.T+1,degree)
shape_adj = K_adj @ output @ K_adj.T
shape_adj = shape_adj.diagonal()
K_t = np.power(X_t @ X_opt.T+1,degree)
shape_t = K_t @ output @ K_t.T
shape_t = shape_t.diagonal()

alpha = 0.05
delta = -1
Delta = 0
prop = 0.5
prop_outside = (np.power(Y_adj, 2)[:,0] > (1 + delta) * shape_adj).mean()
while prop_outside > (0.75 * alpha) and delta != Delta:
    delta = prop * delta + (1 - prop) * Delta
    prop_outside = (np.power(Y_adj, 2)[:,0] > (1 + delta) * shape_adj).mean()
    
print(delta)

In [None]:
M_t = np.zeros(n_t) 
M_t = M_t.reshape(-1,1)
V_alpha_t = (1 + delta) * shape_t
V_alpha_t = V_alpha_t.reshape(-1,1)
my_plot(X_t,Y_t,M_t,V_alpha_t,[-7,7])