In [1]:
from All_functions_simulation_univariate import * 
from sklearn.linear_model import QuantileRegressor

In [2]:
################################### Data Generating ###################################

def myData_S1(i, n=2000):
    """
    i: random seed
    n: number of samples
    """
    np.random.seed(i)
    X = np.random.uniform(-1, 1, n).reshape(-1, 1)
    Y = np.sqrt(1+25*np.power(X, 4))  * np.random.uniform(-1, 1, n).reshape(-1, 1)
    
    return X, Y

def myData_S2(i, n=2000):
    """
    i: random seed
    n: number of samples
    """
    np.random.seed(i)
    X = np.random.uniform(-1, 1, n).reshape(-1, 1)
    Y = 1+5*np.power(X, 3)+np.sqrt(1+25*np.power(X, 4))  * np.random.uniform(-1, 1, n).reshape(-1, 1)
    
    return X, Y


def myData_S3(i, n=2000):
    """
    i: random seed
    n: number of samples
    """
    np.random.seed(i)
    X = np.random.uniform(-1, 1, n).reshape(-1, 1)
    Gamma_shape =5+np.power(X, 1)
    Gamma_scale = 1+0.5*np.sin(X)
    mean_Gamma = Gamma_shape*Gamma_scale
    upper_bound = 2*mean_Gamma
    Y = np.random.gamma(Gamma_shape, Gamma_scale)
    
    # Correct values that fall out of bounds
    while True:
        out_of_bounds = Y > upper_bound
        if not np.any(out_of_bounds):
            break
        Y[out_of_bounds] = np.random.gamma(Gamma_shape[out_of_bounds], Gamma_scale[out_of_bounds])
    
    return X, Y

In [3]:
################################### UTOPIA ###################################

def UTOPIA(X,Y,alpha = 0.05,n_train=950,n_adj=50, n_t = 1000,known_mean = "False"):
    """
    X,Y: input data
    alpha: coverage level
    """
    X_pre = X[0:n_train,:].reshape(-1, 1)
    Y_pre = Y[0:n_train,:].reshape(-1, 1)

    X_opt = X[0:n_train,:].reshape(-1, 1)
    Y_opt = Y[0:n_train,:].reshape(-1, 1)

    X_adj = X[n_train:n_train+n_adj,:].reshape(-1, 1)
    Y_adj = Y[n_train:n_train+n_adj,:].reshape(-1, 1)


    X_t = X[n_train+n_adj:,:].reshape(-1, 1)
    Y_t = Y[n_train+n_adj:,:].reshape(-1, 1)
    
    # Obtain mean estimator
    if known_mean == "True":
        M_pre = np.zeros(n_train).reshape(-1,1)
        M_opt = np.zeros(n_train).reshape(-1,1)
        M_adj = np.zeros(n_adj).reshape(-1,1)
        M_t = np.zeros(n_t).reshape(-1,1)
    else:
        est_type = "NN2"
        M_pre, M_opt, M_adj, M_t = mean_est(est_type,X_pre,Y_pre,X_opt,X_adj,X_t)
        
    # Obtain variance estimator
    var_opt, var_adj, var_t = var_est(X_pre,Y_pre,M_pre, X_opt,X_adj,X_t,est_type ="NN1")
    
    # Obtain quantile estimators
    quantile = [0.8,0.85,0.9,0.95]
    m1,Q1_opt,Q1_adj,Q1_t = est_quantile("NN1",quantile[0],X_pre,Y_pre,X_opt,X_adj,X_t)
    m2,Q2_opt,Q2_adj,Q2_t = est_quantile("NN2",quantile[1],X_pre,Y_pre,X_opt,X_adj,X_t)
    m3,Q3_opt,Q3_adj,Q3_t = est_quantile("qrf",quantile[2],X_pre,Y_pre,X_opt,X_adj,X_t)
    m4,Q4_opt,Q4_adj,Q4_t = est_quantile("gb",quantile[3],X_pre,Y_pre,X_opt,X_adj,X_t)
    
    # construct estimator matrix
    E_opt = np.hstack(((Q1_opt-M_opt)**2, (Q2_opt-M_opt)**2, (Q3_opt-M_opt)**2, (Q4_opt-M_opt)**2, var_opt))
    E_opt = E_opt.T
    E_adj = np.hstack(((Q1_adj-M_adj)**2, (Q2_adj-M_adj)**2, (Q3_adj-M_adj)**2, (Q4_adj-M_adj)**2, var_adj))
    E_adj = E_adj.T
    E_t = np.hstack(((Q1_t-M_t)**2, (Q2_t-M_t)**2, (Q3_t-M_t)**2, (Q4_t-M_t)**2, var_t))
    E_t = E_t.T
    
    # solve optimization problem
    optimal_weight, V100_adj, V100_t = solve_opt(X_opt,Y_opt, M_opt, M_adj, M_t, X_adj, X_t, "aug", E_opt, E_adj, E_t)
    
    # adjust interval
    alpha = 0.05
    delta = interval_adj(X_adj,Y_adj,M_adj,V100_adj,alpha)
    V_alpha_t = delta*V100_t
    
    # Output bandwidth and coverage
    coverage = (np.power(Y_t[:,0]-M_t[:,0], 2) <= V_alpha_t[:,0]).mean()
    bandwidth = np.mean(V_alpha_t[:,0])
    
    return coverage, bandwidth


In [4]:
################################### Repeat Simulation 1 ###################################

times = 200

UTOPIA_cover_S1 = np.zeros(times)
UTOPIA_wide_S1 = np.zeros(times)



for i in range(times):
    X,Y = myData_S1(i)
    UTOPIA_cover_S1[i], UTOPIA_wide_S1[i] = UTOPIA(X,Y,known_mean ="True")

    

print("For UTOPIA:")
print("The mean of the coverage is", np.mean(UTOPIA_cover_S1))
print("The median of the coverage is", np.median(UTOPIA_cover_S1))
print("The SD of the coverage is", np.std(UTOPIA_cover_S1))
print("The mean of the bandwidth is", np.mean(UTOPIA_wide_S1))
print("The median of the bandwidth is", np.median(UTOPIA_wide_S1))
print("The SD of the bandwidth is", np.std(UTOPIA_wide_S1))

For UTOPIA:
The mean of the coverage is 0.9379949999999999
The median of the coverage is 0.9404999999999999
The SD of the coverage is 0.03184438686801803
The mean of the bandwidth is 5.065053261318008
The median of the bandwidth is 5.025081999849635
The SD of the bandwidth is 0.5975294029438064


In [5]:
################################### Repeat Simulation 2 ###################################


UTOPIA_cover_S2 = np.zeros(times)
UTOPIA_wide_S2 = np.zeros(times)



for i in range(times):
    X,Y = myData_S2(i)
    UTOPIA_cover_S2[i], UTOPIA_wide_S2[i] = UTOPIA(X,Y,known_mean = "False")

    
    
print("For UTOPIA:")
print("The mean of the coverage is", np.mean(UTOPIA_cover_S2))
print("The median of the coverage is", np.median(UTOPIA_cover_S2))
print("The SD of the coverage is", np.std(UTOPIA_cover_S2))
print("The mean of the bandwidth is", np.mean(UTOPIA_wide_S2))
print("The median of the bandwidth is", np.median(UTOPIA_wide_S2))
print("The SD of the bandwidth is", np.std(UTOPIA_wide_S2))

For UTOPIA:
The mean of the coverage is 0.933195
The median of the coverage is 0.9365000000000001
The SD of the coverage is 0.03130330613529502
The mean of the bandwidth is 5.074640063336197
The median of the bandwidth is 5.078139558436089
The SD of the bandwidth is 0.6801482625408364


In [6]:
################################### Repeat Simulation 3 ###################################


UTOPIA_cover_S3 = np.zeros(times)
UTOPIA_wide_S3 = np.zeros(times)


for i in range(times):
    X,Y = myData_S3(i)
    UTOPIA_cover_S3[i], UTOPIA_wide_S3[i] = UTOPIA(X,Y,known_mean = "False")

    
print("For UTOPIA:")
print("The mean of the coverage is", np.mean(UTOPIA_cover_S3))
print("The median of the coverage is", np.median(UTOPIA_cover_S3))
print("The SD of the coverage is", np.std(UTOPIA_cover_S3))
print("The mean of the bandwidth is", np.mean(UTOPIA_wide_S3))
print("The median of the bandwidth is", np.median(UTOPIA_wide_S3))
print("The SD of the bandwidth is", np.std(UTOPIA_wide_S3))

For UTOPIA:
The mean of the coverage is 0.91201
The median of the coverage is 0.917
The SD of the coverage is 0.032899542550011235
The mean of the bandwidth is 12.99041919490763
The median of the bandwidth is 12.819390936334095
The SD of the bandwidth is 2.3746154318532473


In [7]:
Output = np.vstack((UTOPIA_cover_S1,UTOPIA_wide_S1,UTOPIA_cover_S2,UTOPIA_wide_S2,UTOPIA_cover_S3,UTOPIA_wide_S3))
np.savetxt("UTOPIA_nosplit_1000train.csv", Output, delimiter=",")