In [1]:
from All_functions_simulation_univariate import * 
from sklearn.linear_model import QuantileRegressor
import math

In [2]:
def generate_points_on_sphere(n):
    phi = np.random.uniform(0, 2*np.pi, size=n)
    cos_theta = np.random.uniform(-1, 1, size=n)
    theta = np.arccos(cos_theta)

    x = np.sin(theta) * np.cos(phi)
    y = np.sin(theta) * np.sin(phi)
    z = np.cos(theta)

    return np.stack((x, y, z), axis=-1)

In [3]:
################################### SETUP 1 ###################################

##################### UTOPIA #####################

np.random.seed(5)
n_pre = 1000
n_opt = 900
n_adj = 100
n_t = 1000
n = n_pre+n_opt+n_adj+n_t
beta = np.array([1/math.sqrt(3),1/math.sqrt(3),-1/math.sqrt(3)])

X = generate_points_on_sphere(n)
Y = np.sqrt(1+25*np.power(X @ beta, 4))  * np.random.uniform(-1, 1, n)
Y = Y.reshape(-1,1)

X_pre = X[0:n_pre,:]
Y_pre = Y[0:n_pre,:].reshape(-1, 1)

X_opt = X[n_pre:n_pre+n_opt,:]
Y_opt = Y[n_pre:n_pre+n_opt,:].reshape(-1, 1)

X_adj = X[n_pre+n_opt:n_pre+n_opt+n_adj,:]
Y_adj = Y[n_pre+n_opt:n_pre+n_opt+n_adj,:].reshape(-1, 1)


X_t = X[n_pre+n_opt+n_adj:,:]
Y_t = Y[n_pre+n_opt+n_adj:,:].reshape(-1, 1)


# Obtain mean estimator
known_mean = "True"
if known_mean == "True":
    M_pre = np.zeros(n_pre).reshape(-1,1)
    M_opt = np.zeros(n_opt).reshape(-1,1)
    M_adj = np.zeros(n_adj).reshape(-1,1)
    M_t = np.zeros(n_t).reshape(-1,1)
else:
    est_type = "NN2"
    M_pre, M_opt, M_adj, M_t = mean_est(est_type,X_pre,Y_pre,X_opt,X_adj,X_t)
    
# Obtain variance estimator
var_opt, var_adj, var_t = var_est(X_pre,Y_pre,M_pre, X_opt,X_adj,X_t,est_type ="NN1")
    
# Obtain quantile estimators
quantile = [0.8,0.85,0.9,0.95]
# quantile = [0.6,0.7,0.8,0.9]
m1,Q1_opt,Q1_adj,Q1_t = est_quantile("NN1",quantile[0],X_pre,Y_pre,X_opt,X_adj,X_t)
m2,Q2_opt,Q2_adj,Q2_t = est_quantile("NN2",quantile[1],X_pre,Y_pre,X_opt,X_adj,X_t)
m3,Q3_opt,Q3_adj,Q3_t = est_quantile("qrf",quantile[2],X_pre,Y_pre,X_opt,X_adj,X_t)
m4,Q4_opt,Q4_adj,Q4_t = est_quantile("gb",quantile[3],X_pre,Y_pre,X_opt,X_adj,X_t)

# construct estimator matrix
E_opt = np.hstack(((Q1_opt-M_opt)**2, (Q2_opt-M_opt)**2, (Q3_opt-M_opt)**2, (Q4_opt-M_opt)**2, var_opt))
E_opt = E_opt.T
E_adj = np.hstack(((Q1_adj-M_adj)**2, (Q2_adj-M_adj)**2, (Q3_adj-M_adj)**2, (Q4_adj-M_adj)**2, var_adj))
E_adj = E_adj.T
E_t = np.hstack(((Q1_t-M_t)**2, (Q2_t-M_t)**2, (Q3_t-M_t)**2, (Q4_t-M_t)**2, var_t))
E_t = E_t.T

# solve optimization problem

optimal_weight, V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "aug", E_opt, E_adj, E_t)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_poly", degree = 3)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_rbf", sigma = 1)

# adjust interval
alpha = 0.05
delta = interval_adj(X_adj,Y_adj,M_adj,V100_adj,alpha)

# plot
# delta = 1
V_alpha_t = delta*V100_t
coverage = (np.power(Y_t[:,0]-M_t[:,0], 2) <= V_alpha_t[:,0]).mean()
bandwidth = np.mean(V_alpha_t[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)


##################### LQR #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_quantile = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_quantile = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
n_quantile = X_quantile.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
n_test = X_test.shape[0]


# Estimate the quantile
M_quantile = np.zeros(n_quantile).reshape(-1,1)

alpha = 0.05

model_quantile = QuantileRegressor(quantile=1-(alpha/2), alpha=0,solver='highs')
model_quantile.fit(X_quantile, (Y_quantile-M_quantile).reshape(-1))
Q_test = model_quantile.predict(X_test)


M_test = np.zeros(n_test).reshape(-1,1)
V_test = Q_test**2
V_test = V_test.reshape(-1,1)

coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

##################### SPLIT CONFORMAL #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_res = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_res = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
x_res = X_res[:,0]
y_res = Y_res[:,0]
n_res = X_res.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
x_test = X_test[:,0]
y_test = Y_test[:,0]
n_test = X_test.shape[0]


# Calculate the conformity scores
y_res_pred = np.zeros(n_res)
residuals = np.abs(y_res - y_res_pred)

alpha = 0.05  # 95% confidence level
k = int((1 - alpha) * len(y_lin))
residuals_sorted = np.sort(residuals)
threshold = residuals_sorted[k]

# Calculate the prediction interval
m_test = np.zeros(n_test)
M_test = m_test.reshape(-1,1)
v_test = (threshold**2)*np.ones(n_test)
V_test = v_test.reshape(-1,1)


coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

The overall coverage is 0.964
The mean bandwidth for testing data is 6.55565064270329
The overall coverage is 0.95
The mean bandwidth for testing data is 10.778005204340479
The overall coverage is 0.945
The mean bandwidth for testing data is 10.094771738092378


In [4]:
################################### SETUP 2 ###################################

##################### UTOPIA #####################

np.random.seed(0)
n_pre = 1000
n_opt = 900
n_adj = 100
n_t = 1000
n = n_pre+n_opt+n_adj+n_t
beta = np.array([1/math.sqrt(3),1/math.sqrt(3),-1/math.sqrt(3)])

X = generate_points_on_sphere(n)
Y = 1+5*np.power(X @ beta, 3)+np.sqrt(1+25*np.power(X @ beta, 4))  * np.random.uniform(-1, 1, n)
Y = Y.reshape(-1,1)

X_pre = X[0:n_pre,:]
Y_pre = Y[0:n_pre,:].reshape(-1, 1)

X_opt = X[n_pre:n_pre+n_opt,:]
Y_opt = Y[n_pre:n_pre+n_opt,:].reshape(-1, 1)

X_adj = X[n_pre+n_opt:n_pre+n_opt+n_adj,:]
Y_adj = Y[n_pre+n_opt:n_pre+n_opt+n_adj,:].reshape(-1, 1)


X_t = X[n_pre+n_opt+n_adj:,:]
Y_t = Y[n_pre+n_opt+n_adj:,:].reshape(-1, 1)


# Obtain mean estimator
known_mean = "False"
if known_mean == "True":
    M_pre = np.zeros(n_pre).reshape(-1,1)
    M_opt = np.zeros(n_opt).reshape(-1,1)
    M_adj = np.zeros(n_adj).reshape(-1,1)
    M_t = np.zeros(n_t).reshape(-1,1)
else:
    est_type = "NN2"
    M_pre, M_opt, M_adj, M_t = mean_est(est_type,X_pre,Y_pre,X_opt,X_adj,X_t)
    
# Obtain variance estimator
var_opt, var_adj, var_t = var_est(X_pre,Y_pre,M_pre, X_opt,X_adj,X_t,est_type ="NN1")
    
# Obtain quantile estimators
quantile = [0.8,0.85,0.9,0.95]
# quantile = [0.6,0.7,0.8,0.9]
m1,Q1_opt,Q1_adj,Q1_t = est_quantile("NN1",quantile[0],X_pre,Y_pre,X_opt,X_adj,X_t)
m2,Q2_opt,Q2_adj,Q2_t = est_quantile("NN2",quantile[1],X_pre,Y_pre,X_opt,X_adj,X_t)
m3,Q3_opt,Q3_adj,Q3_t = est_quantile("qrf",quantile[2],X_pre,Y_pre,X_opt,X_adj,X_t)
m4,Q4_opt,Q4_adj,Q4_t = est_quantile("gb",quantile[3],X_pre,Y_pre,X_opt,X_adj,X_t)

# construct estimator matrix
E_opt = np.hstack(((Q1_opt-M_opt)**2, (Q2_opt-M_opt)**2, (Q3_opt-M_opt)**2, (Q4_opt-M_opt)**2, var_opt))
E_opt = E_opt.T
E_adj = np.hstack(((Q1_adj-M_adj)**2, (Q2_adj-M_adj)**2, (Q3_adj-M_adj)**2, (Q4_adj-M_adj)**2, var_adj))
E_adj = E_adj.T
E_t = np.hstack(((Q1_t-M_t)**2, (Q2_t-M_t)**2, (Q3_t-M_t)**2, (Q4_t-M_t)**2, var_t))
E_t = E_t.T

# solve optimization problem

optimal_weight, V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "aug", E_opt, E_adj, E_t)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_poly", degree = 3)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_rbf", sigma = 1)

# adjust interval
alpha = 0.05
delta = interval_adj(X_adj,Y_adj,M_adj,V100_adj,alpha)

# plot
V_alpha_t = delta*V100_t
coverage = (np.power(Y_t[:,0]-M_t[:,0], 2) <= V_alpha_t[:,0]).mean()
bandwidth = np.mean(V_alpha_t[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)



##################### LQR #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_quantile = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_quantile = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
n_quantile = X_quantile.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
n_test = X_test.shape[0]


# Estimate the mean using NN2
est_type = "NN2"
M_quantile, M_test = mean_est_2(est_type,X_lin,Y_lin,X_quantile,X_test)

# Estimate the quantile
alpha = 0.05

model_quantile = QuantileRegressor(quantile=1-(alpha/2), alpha=0,solver='highs')
model_quantile.fit(X_quantile, (Y_quantile-M_quantile).reshape(-1))
Q_test = model_quantile.predict(X_test)


V_test = Q_test**2
V_test = V_test.reshape(-1,1)

coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

##################### SPLIT CONFORMAL #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_res = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_res = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
x_res = X_res[:,0]
y_res = Y_res[:,0]
n_res = X_res.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
x_test = X_test[:,0]
y_test = Y_test[:,0]
n_test = X_test.shape[0]


# Estimate the mean using NN2
est_type = "NN2"
Y_res_pred, M_test = mean_est_2(est_type,X_lin,Y_lin,X_res,X_test)


# Calculate the conformity scores
y_res_pred = Y_res_pred[:,0]
residuals = np.abs(y_res - y_res_pred)

alpha = 0.05  # 95% confidence level
k = int((1 - alpha) * len(y_lin))
residuals_sorted = np.sort(residuals)
threshold = residuals_sorted[k]

# Calculate the prediction interval
v_test = (threshold**2)*np.ones(n_test)
V_test = v_test.reshape(-1,1)


coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

The overall coverage is 0.987
The mean bandwidth for testing data is 8.693262227379767
The overall coverage is 0.955
The mean bandwidth for testing data is 11.224126189011635
The overall coverage is 0.964
The mean bandwidth for testing data is 11.483359826443577


In [5]:
################################### SETUP 3 ###################################

##################### UTOPIA #####################

np.random.seed(1)
n_pre = 1000
n_opt = 900
n_adj = 100
n_t = 1000
n = n_pre+n_opt+n_adj+n_t
beta = np.array([1/math.sqrt(3),1/math.sqrt(3),-1/math.sqrt(3)])

X = generate_points_on_sphere(n)

# Specify the mean and standard deviation for Y
mean_Y = np.power(X @ beta, 2)+5*np.power(X @ beta, 4)
std_dev_Y = np.sqrt(1 + 25 * np.power(X @ beta, 4))
mean_Y = mean_Y.reshape(-1,1)
std_dev_Y  = std_dev_Y .reshape(-1,1)

# Specify the bounds for Y
lower_bound = mean_Y - 2 * std_dev_Y
upper_bound = mean_Y + 2 * std_dev_Y

# Generate all Y values initially
Y = np.random.laplace(mean_Y, std_dev_Y)

# Correct values that fall out of bounds
while True:
    out_of_bounds = (Y < lower_bound) | (Y > upper_bound)
    if not np.any(out_of_bounds):
        break
    Y[out_of_bounds] = np.random.laplace(mean_Y[out_of_bounds], std_dev_Y[out_of_bounds])


X_pre = X[0:n_pre,:]
Y_pre = Y[0:n_pre,:].reshape(-1, 1)

X_opt = X[n_pre:n_pre+n_opt,:]
Y_opt = Y[n_pre:n_pre+n_opt,:].reshape(-1, 1)

X_adj = X[n_pre+n_opt:n_pre+n_opt+n_adj,:]
Y_adj = Y[n_pre+n_opt:n_pre+n_opt+n_adj,:].reshape(-1, 1)


X_t = X[n_pre+n_opt+n_adj:,:]
Y_t = Y[n_pre+n_opt+n_adj:,:].reshape(-1, 1)



# Obtain mean estimator
known_mean = "False"
if known_mean == "True":
    M_pre = np.zeros(n_pre).reshape(-1,1)
    M_opt = np.zeros(n_opt).reshape(-1,1)
    M_adj = np.zeros(n_adj).reshape(-1,1)
    M_t = np.zeros(n_t).reshape(-1,1)
else:
    est_type = "NN2"
    M_pre, M_opt, M_adj, M_t = mean_est(est_type,X_pre,Y_pre,X_opt,X_adj,X_t)
    
# Obtain variance estimator
var_opt, var_adj, var_t = var_est(X_pre,Y_pre,M_pre, X_opt,X_adj,X_t,est_type ="NN1")
    
# Obtain quantile estimators
quantile = [0.8,0.85,0.9,0.95]
# quantile = [0.6,0.7,0.8,0.9]
m1,Q1_opt,Q1_adj,Q1_t = est_quantile("NN1",quantile[0],X_pre,Y_pre,X_opt,X_adj,X_t)
m2,Q2_opt,Q2_adj,Q2_t = est_quantile("NN2",quantile[1],X_pre,Y_pre,X_opt,X_adj,X_t)
m3,Q3_opt,Q3_adj,Q3_t = est_quantile("qrf",quantile[2],X_pre,Y_pre,X_opt,X_adj,X_t)
m4,Q4_opt,Q4_adj,Q4_t = est_quantile("gb",quantile[3],X_pre,Y_pre,X_opt,X_adj,X_t)

# construct estimator matrix
E_opt = np.hstack(((Q1_opt-M_opt)**2, (Q2_opt-M_opt)**2, (Q3_opt-M_opt)**2, (Q4_opt-M_opt)**2, var_opt))
E_opt = E_opt.T
E_adj = np.hstack(((Q1_adj-M_adj)**2, (Q2_adj-M_adj)**2, (Q3_adj-M_adj)**2, (Q4_adj-M_adj)**2, var_adj))
E_adj = E_adj.T
E_t = np.hstack(((Q1_t-M_t)**2, (Q2_t-M_t)**2, (Q3_t-M_t)**2, (Q4_t-M_t)**2, var_t))
E_t = E_t.T

# solve optimization problem

optimal_weight, V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "aug", E_opt, E_adj, E_t)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_poly", degree = 3)
#V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "rkhs_rbf", sigma = 1)

# adjust interval
alpha = 0.05
delta = interval_adj(X_adj,Y_adj,M_adj,V100_adj,alpha)

# plot
V_alpha_t = delta*V100_t
coverage = (np.power(Y_t[:,0]-M_t[:,0], 2) <= V_alpha_t[:,0]).mean()
bandwidth = np.mean(V_alpha_t[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)


##################### LQR #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_quantile = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_quantile = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
n_quantile = X_quantile.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
n_test = X_test.shape[0]

# Estimate the mean using NN2
est_type = "NN2"
M_quantile, M_test = mean_est_2(est_type,X_lin,Y_lin,X_quantile,X_test)


# Estimate the quantile
alpha = 0.05

model_quantile = QuantileRegressor(quantile=1-(alpha/2), alpha=0,solver='highs')
model_quantile.fit(X_quantile, (Y_quantile-M_quantile).reshape(-1))
Q_test = model_quantile.predict(X_test)


V_test = Q_test**2
V_test = V_test.reshape(-1,1)

coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

##################### SPLIT CONFORMAL #####################

X_lin = X[0:int((n_pre+n_opt+n_adj)/2),:]
Y_lin = Y[0:int((n_pre+n_opt+n_adj)/2),:]
x_lin = X_lin[:,0]
y_lin = Y_lin[:,0]
n_lin = X_lin.shape[0]

X_res = X[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
Y_res = Y[int((n_pre+n_opt+n_adj)/2):n_pre+n_opt+n_adj,:]
x_res = X_res[:,0]
y_res = Y_res[:,0]
n_res = X_res.shape[0]

X_test = X[n_pre+n_opt+n_adj:,:]
Y_test = Y[n_pre+n_opt+n_adj:,:]
x_test = X_test[:,0]
y_test = Y_test[:,0]
n_test = X_test.shape[0]

# Estimate the mean using NN2
est_type = "NN2"
Y_res_pred, M_test = mean_est_2(est_type,X_lin,Y_lin,X_res,X_test)


# Calculate the conformity scores
y_res_pred = Y_res_pred[:,0]
residuals = np.abs(y_res - y_res_pred)

alpha = 0.05  # 95% confidence level
k = int((1 - alpha) * len(y_lin))
residuals_sorted = np.sort(residuals)
threshold = residuals_sorted[k]

# Calculate the prediction interval
v_test = (threshold**2)*np.ones(n_test)
V_test = v_test.reshape(-1,1)


coverage = (np.power(Y_test[:,0]-M_test[:,0], 2) <= V_test[:,0]).mean()
bandwidth = np.mean(V_test[:,0])
print("The overall coverage is", coverage)
print("The mean bandwidth for testing data is", bandwidth)

The overall coverage is 0.936
The mean bandwidth for testing data is 17.398213808090194
The overall coverage is 0.927
The mean bandwidth for testing data is 28.680294296891606
The overall coverage is 0.919
The mean bandwidth for testing data is 21.42858499345289
