# Additional simulations results:

In [1]:
import numpy as np
import pandas as pd
import os
import scipy.stats as st

from nonconformist.cp import IcpRegressor
from nonconformist.nc import NcFactory
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
plt.style.use("seaborn-v0_8-white")

from clover.locart import LocalRegressionSplit, LocartSplit, MondrianRegressionSplit, RegressionSplit
from acpi import ACPI
from clover.scores import RegressionScore, LocalRegressionScore
import time

from clover.locart import LocartSplit, MondrianRegressionSplit
from clover.scores import RegressionScore
from clover.simulation import simulation, make_correlated_design
from clover.utils import compute_interval_length, split, real_coverage, smis

original_path = os.getcwd()

# figure path
images_dir = "figures"

## Defining function to run new simulated example:

In [2]:
def run_additional_example(
    n_train = 10000,
    kind="homoscedastic",
    p=1,
    d=20,
    t_degree=4,
    base_model=RandomForestRegressor,
    sig=0.1,
    hetero_value=0.25,
    asym_value=0.6,
    calib_size = 0.5,
    coef = 2,
    rate = 1,
    rho = 0.7,
    B_x = 5000,
    B_y = 1000, 
    random_seed_X = 850,
    random_seed = 1250,
    random_projections=False,
    split_seed = 125,
    h=20,
    m=300,
    split_calib=False,
    split_mondrian=False,
    nbins=30,
    criterion="squared_error",
    max_depth=None,
    max_leaf_nodes=None,
    min_samples_leaf=150,
    prune=True,
    **kwargs):
    noise = (p == 1)

    # setting simulation object
    sim_obj = simulation(
        dim = d, 
        coef = coef,
        signif_vars=p,
        hetero_value = hetero_value, 
        asym_value = asym_value, 
        noise = noise, 
        rate = rate,
        t_degree = t_degree,
        rho = rho,
        )
    r_kind = getattr(sim_obj, kind + "_r")
    sim_kind = getattr(sim_obj, kind)

    # generating testing samples
    np.random.seed(random_seed_X)
    
    if kind == "splitted_exp":
        X_test = np.random.uniform(low = 0, high = 1, size = (B_x, d))
    elif kind == "correlated_homoscedastic" or kind == "correlated_heteroscedastic":
        X_test = make_correlated_design(n_samples = B_x, n_features = d, rho = rho, random_state = random_seed_X)
    else:
        X_test = np.random.uniform(low = -1.5, high = 1.5, size=(B_x, d))
        
    if noise:
        X_grid = X_test[:, 0]
    else:
        X_grid = X_test
    
    print(np.max(X_grid))
    print(np.min(X_grid))

    # generating y_test
    y_test = r_kind(X_grid, B=1).flatten()
    sim_kind(2 * n_train, random_seed=random_seed)

    print(np.max(sim_obj.X[:, 0]))
    print(np.min(sim_obj.X[:, 0]))
    
    data = split(
        sim_obj.X,
        sim_obj.y,
        test_size=calib_size,
        calibrate=False,
        random_seed=split_seed,
    )

     # matrix of y's associated to each X in test set
    if noise:
        y_mat = r_kind(X_test[:, 0], B=B_y)
    else:
        y_mat = r_kind(X_test, B=B_y)

    # fitting model
    model = base_model(**kwargs).fit(data["X_train"], data["y_train"])
        

    # fitting locart
    print("Fitting locart to toy example:")
    start_loc = time.time()
    locart_obj = LocartSplit(
                    nc_score=RegressionScore,
                    cart_type="CART",
                    base_model=model,
                    is_fitted=True,
                    alpha=sig,
                    split_calib=split_calib,
                    **kwargs
                )
    locart_obj.fit(data["X_train"], data["y_train"])
    locart_obj.calib(
        data["X_test"],
        data["y_test"],
        max_depth=max_depth,
        max_leaf_nodes=max_leaf_nodes,
        min_samples_leaf=min_samples_leaf,
        criterion=criterion,
        prune_tree=prune,
        random_projections=random_projections,
        m=m,
        h=h,
    )
    
    end_loc = time.time() - start_loc
    print("Time Elapsed to fit Locart: ", end_loc)

    print("Computing metrics")
    start_loc = time.time()
    
    # predictions
    locart_pred = np.array(locart_obj.predict(X_test))
    cond_locart_real = real_coverage(locart_pred, y_mat)

    # average distance
    dif_locart = np.abs(cond_locart_real - (1 - sig))
    locart_ave_dist = np.mean(dif_locart)

    # marginal coverage
    marg_cover = (
                    np.logical_and(
                        y_test >= locart_pred[:, 0], y_test <= locart_pred[:, 1]
                    )
                    + 0
                )
    locart_ave_marginal_cov = np.mean(marg_cover)

    # smis
    locart_smis = smis(locart_pred, y_test, alpha=sig)

    # mean interval length
    locart_interval_len = np.mean(compute_interval_length(locart_pred))

     # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    locart_interval_len_cover = np.mean(compute_interval_length(locart_pred[cover_idx]))

    end_loc = time.time() - start_loc
    print("Time Elapsed to compute metrics: ", end_loc)

    print("Fitting loforest to example:")
    start_loc = time.time()
    loforest_obj = LocartSplit(nc_score = RegressionScore, base_model = model, cart_type = "RF", alpha = sig, split_calib = split_calib, is_fitted = True,
                             weighting = False, **kwargs)
    loforest_obj.fit(data["X_train"], data["y_train"])
    loforest_obj.calib(data["X_test"], data["y_test"], max_depth = max_depth, 
    max_leaf_nodes = max_leaf_nodes, min_samples_leaf = min_samples_leaf, criterion = criterion, prune_tree = prune)
    end_loc = time.time() - start_loc
    print("Time Elapsed to fit Loforest: ", end_loc)

    print("Computing metrics")
    start_loc = time.time()
    
    # predictions
    loforest_pred = np.array(loforest_obj.predict(X_test))
    cond_loforest_real = real_coverage(loforest_pred, y_mat)

    # average, median and max distance
    dif_loforest = np.abs(cond_loforest_real - (1 - sig))
    loforest_ave_dist = np.mean(dif_loforest)

    # smis
    loforest_smis = smis(loforest_pred, y_test, alpha = sig)

    # mean interval length
    loforest_interval_len = np.mean(compute_interval_length(loforest_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test >= loforest_pred[:, 0], 
        y_test <= loforest_pred[:, 1]) + 0
    loforest_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    loforest_interval_len_cover = np.mean(compute_interval_length(loforest_pred[cover_idx]))
    end_loc = time.time() - start_loc
    print("Time Elapsed to compute metrics for Loforest: ", end_loc)

    # fitting A-locart
    print("Fitting A-locart to toy example:")
    start_loc = time.time()
    alocart_obj = LocartSplit(
                    nc_score=RegressionScore,
                    cart_type="CART",
                    base_model=model,
                    weighting=True,
                    is_fitted=True,
                    alpha=sig,
                    split_calib=split_calib,
                    **kwargs
                )
    alocart_obj.fit(data["X_train"], data["y_train"])
    alocart_obj.calib(
        data["X_test"],
        data["y_test"],
        max_depth=max_depth,
        max_leaf_nodes=max_leaf_nodes,
        min_samples_leaf=min_samples_leaf,
        criterion=criterion,
        prune_tree=prune,
        random_projections=random_projections,
        m=m,
        h=h,
    )
    
    end_loc = time.time() - start_loc
    print("Time Elapsed to fit A-Locart: ", end_loc)

    print("Computing metrics")
    start_loc = time.time()
    
    # predictions
    alocart_pred = np.array(alocart_obj.predict(X_test))
    cond_alocart_real = real_coverage(alocart_pred, y_mat)

    # average distance
    dif_alocart = np.abs(cond_alocart_real - (1 - sig))
    alocart_ave_dist = np.mean(dif_alocart)

    # marginal coverage
    marg_cover = (
                    np.logical_and(
                        y_test >= alocart_pred[:, 0], y_test <= alocart_pred[:, 1]
                    )
                    + 0
                )
    alocart_ave_marginal_cov = np.mean(marg_cover)

    # smis
    alocart_smis = smis(alocart_pred, y_test, alpha=sig)

    # mean interval length
    alocart_interval_len = np.mean(compute_interval_length(alocart_pred))

     # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    alocart_interval_len_cover = np.mean(compute_interval_length(alocart_pred[cover_idx]))

    end_loc = time.time() - start_loc
    print("Time Elapsed to compute metrics: ", end_loc)

    print("Fitting A-loforest to example:")
    start_loc = time.time()
    aloforest_obj = LocartSplit(nc_score = RegressionScore, base_model = model, cart_type = "RF", alpha = sig, split_calib = split_calib, is_fitted = True,
                             weighting = True, **kwargs)
    aloforest_obj.fit(data["X_train"], data["y_train"])
    aloforest_obj.calib(data["X_test"], data["y_test"], max_depth = max_depth, 
    max_leaf_nodes = max_leaf_nodes, min_samples_leaf = min_samples_leaf, criterion = criterion, prune_tree = prune)
    end_loc = time.time() - start_loc
    print("Time Elapsed to fit A-loforest: ", end_loc)

    print("Computing metrics")
    start_loc = time.time()
    
    # predictions
    aloforest_pred = np.array(aloforest_obj.predict(X_test))
    cond_aloforest_real = real_coverage(aloforest_pred, y_mat)

    # average, median and max distance
    dif_aloforest = np.abs(cond_aloforest_real - (1 - sig))
    aloforest_ave_dist = np.mean(dif_aloforest)

    # smis
    aloforest_smis = smis(aloforest_pred, y_test, alpha = sig)

    # mean interval length
    aloforest_interval_len = np.mean(compute_interval_length(aloforest_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test >= aloforest_pred[:, 0], 
        y_test <= aloforest_pred[:, 1]) + 0
    aloforest_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    aloforest_interval_len_cover = np.mean(compute_interval_length(aloforest_pred[cover_idx]))
    end_loc = time.time() - start_loc
    print("Time Elapsed to compute metrics for A-loforest: ", end_loc)


    # fitting default regression split
    print("Fitting regression split")
    start_split = time.time()
    icp = RegressionSplit(base_model = model, alpha = sig, is_fitted = True, **kwargs)
    icp.fit(data["X_train"], data["y_train"])
    icp.calibrate(data["X_test"], data["y_test"])

    # nc = NcFactory.create_nc(model)
    # icp = IcpRegressor(nc)
    # icp.fit(data["X_train"], data["y_train"])
    # icp.calibrate(data["X_test"], data["y_test"])

    end_split = time.time() - start_split
    print("Time Elapsed to fit regression split: ", end_split)

    print("Computing metrics")
    start_split = time.time()

    # predictions
    # icp_pred = icp.predict(X_test, significance = sig)
    icp_pred = icp.predict(X_test)
    cond_icp_real = real_coverage(icp_pred, y_mat)
    
    # average, median and max distance
    dif_icp = np.abs(cond_icp_real - (1 - sig))
    icp_ave_dist = np.mean(dif_icp)

    # icp smis
    icp_smis = smis(icp_pred, y_test, alpha = sig)

    # ICP interval length
    icp_interval_len = np.mean(compute_interval_length(icp_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test >= icp_pred[:, 0], 
        y_test <= icp_pred[:, 1]) + 0
    icp_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    icp_interval_len_cover = np.mean(compute_interval_length(icp_pred[cover_idx]))

    end_split = time.time() - start_split
    print("Time Elapsed to compute statistics for regression split: ", end_split)

    # fitting wighted regression split
    print("Fitting weighted regression split")
    start_weighted_split = time.time()
    wicp = LocalRegressionSplit(model, alpha = sig, is_fitted = True, **kwargs)
    wicp.fit(data["X_train"], data["y_train"])
    wicp.calibrate(data["X_test"], data["y_test"])

    end_weighted_split = time.time() - start_weighted_split
    print("Time Elapsed to fit weighted regression split: ", end_weighted_split)

    print("Computing metrics")
    start_weighted_split = time.time()
    # predictions
    wicp_pred = wicp.predict(X_test)
    cond_wicp_real = real_coverage(wicp_pred, y_mat)
    
    wicp_dif = np.abs(cond_wicp_real - (1 - sig))
    wicp_ave_dist = np.mean(wicp_dif)

    # smis
    wicp_smis = smis(wicp_pred, y_test, alpha = sig)

    # ICP interval length
    wicp_interval_len = np.mean(compute_interval_length(wicp_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test >= wicp_pred[:, 0], 
        y_test <= wicp_pred[:, 1]) + 0
    wicp_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    wicp_interval_len_cover = np.mean(compute_interval_length(wicp_pred[cover_idx]))
    print("Time Elapsed to compute statistics for weighted regression split: ", end_weighted_split)

    # mondrian split
    print("Fitting mondrian regression split")
    start_weighted_split = time.time()
    micp = MondrianRegressionSplit(model, alpha = sig, k = nbins, is_fitted = True, **kwargs)
    micp.fit(data["X_train"], data["y_train"], split=split_mondrian)
    micp.calibrate(data["X_test"], data["y_test"])

    end_weighted_split = time.time() - start_weighted_split
    print("Time Elapsed to fit mondrian regression split: ", end_weighted_split)

    print("Computing metrics")
    start_weighted_split = time.time()
    # predictions
    micp_pred = micp.predict(X_test)
    cond_micp_real = real_coverage(micp_pred, y_mat)
    
    
    micp_dif = np.abs(cond_micp_real - (1 - sig))
    micp_ave_dist = np.mean(micp_dif)

    # smis
    micp_smis = smis(micp_pred, y_test, alpha = sig)

    # ICP interval length
    micp_interval_len = np.mean(compute_interval_length(micp_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test>= micp_pred[:, 0], 
        y_test <= micp_pred[:, 1]) + 0
    micp_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    micp_interval_len_cover = np.mean(compute_interval_length(micp_pred[cover_idx]))

    print("Fitting ACPI")
    start_weighted_split = time.time()
    acpi = ACPI(model_cali = model, n_estimators = 100)
    acpi.fit(data["X_test"], data["y_test"], nonconformity_func = None)
    acpi.fit_calibration(data["X_test"], data["y_test"], quantile = 1 - sig, only_qrf = True)

    end_weighted_split = time.time() - start_weighted_split
    print("Time Elapsed to fit ACPI: ", end_weighted_split)

    print("Computing metrics")
    start_weighted_split = time.time()
    # predictions
    acpi_pred = np.stack((acpi.predict_pi(X_test, method = "qrf")), axis = -1)
    cond_acpi_real = real_coverage(acpi_pred, y_mat)
    
    
    acpi_dif = np.abs(cond_acpi_real - (1 - sig))
    acpi_ave_dist = np.mean(acpi_dif)

    # smis
    acpi_smis = smis(acpi_pred, y_test, alpha = sig)

    # ICP interval length
    acpi_interval_len = np.mean(compute_interval_length(acpi_pred))

    # marginal coverage
    marg_cover = np.logical_and(y_test >= acpi_pred[:, 0], 
        y_test <= acpi_pred[:, 1]) + 0
    acpi_ave_marginal_cov = np.mean(
        marg_cover
    )

    # interval length | coveraqe
    cover_idx = np.where(marg_cover == 1)
    acpi_interval_len_cover = np.mean(compute_interval_length(acpi_pred[cover_idx]))


    all_results =  pd.DataFrame(data = {"Methods":["LOFOREST", 
                                                   "LOCART",  
                                                   "A-LOFOREST",
                                                   "A-LOCART", 
                                                   "Regresion split", 
                                                   "Weighted regression split", 
                                                   "Mondrian", 
                                                   "QRF-TC"],
        "smis":[loforest_smis, locart_smis, aloforest_smis, alocart_smis, icp_smis, wicp_smis, micp_smis,  acpi_smis],
        "Average marginal coverage":[loforest_ave_marginal_cov,locart_ave_marginal_cov, 
                                     aloforest_ave_marginal_cov,alocart_ave_marginal_cov,
                                     icp_ave_marginal_cov, wicp_ave_marginal_cov, micp_ave_marginal_cov, 
                                     acpi_ave_marginal_cov],
        "Average interval length":[loforest_interval_len,locart_interval_len, 
                                   aloforest_interval_len,alocart_interval_len,
                                   icp_interval_len, wicp_interval_len, micp_interval_len, acpi_interval_len],
        "Average interval length given coverage":[loforest_interval_len_cover, locart_interval_len_cover, 
                                                  aloforest_interval_len_cover, alocart_interval_len_cover,
                                                  icp_interval_len_cover, wicp_interval_len_cover, 
                                                  micp_interval_len_cover, acpi_interval_len_cover],
        "Average distance":[loforest_ave_dist, locart_ave_dist,
                            aloforest_ave_dist, alocart_ave_dist,
                            icp_ave_dist, wicp_ave_dist, micp_ave_dist, acpi_ave_dist]})

    return all_results

In [3]:
data_lists = []
p_array = np.array([1, 3, 5])
n = 10000

## Splitted exponential example:

In [15]:
for p in p_array:
    data_lists.append(run_additional_example(
        kind = "splitted_exp", 
        n_train=n, 
        p = p, 
        coef = 2, 
        rate = 1, 
        B_y = 10**3,
        min_samples_leaf = 150, 
        random_seed = 500, 
        random_state = 45,
        )
        )

Fitting locart to toy example:
Time Elapsed to fit Locart:  1.1365997791290283
Computing metrics
Time Elapsed to compute metrics:  0.11741757392883301
Fitting loforest to example:
Time Elapsed to fit Loforest:  6.939696788787842
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.17168807983398438
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.5893545150756836
Computing metrics
Time Elapsed to compute metrics:  0.20712828636169434
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  7.689961671829224
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.26305675506591797
Fitting regression split
Time Elapsed to fit regression split:  15.362308979034424
Computing metrics
Time Elapsed to compute statistics for regression split:  0.13394451141357422
Fitting weighted regression split
Time Elapsed to fit weighted regression split:  25.93237853050232
Computing metrics
Time Elapsed to compute statistics for weighted regression split:

100%|██████████| 50/50 [01:46<00:00,  2.12s/it]


Time Elapsed to fit ACPI:  111.61511301994324
Computing metrics
Fitting locart to toy example:
Time Elapsed to fit Locart:  0.9251563549041748
Computing metrics
Time Elapsed to compute metrics:  0.11625504493713379
Fitting loforest to example:
Time Elapsed to fit Loforest:  6.733759641647339
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.16889286041259766
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.3180341720581055
Computing metrics
Time Elapsed to compute metrics:  0.20449137687683105
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  7.5885093212127686
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.2624490261077881
Fitting regression split
Time Elapsed to fit regression split:  13.881280899047852
Computing metrics
Time Elapsed to compute statistics for regression split:  0.12343144416809082
Fitting weighted regression split
Time Elapsed to fit weighted regression split:  24.917664527893066
Computing metrics


100%|██████████| 50/50 [01:43<00:00,  2.08s/it]


Time Elapsed to fit ACPI:  109.19141602516174
Computing metrics
Fitting locart to toy example:
Time Elapsed to fit Locart:  0.9192376136779785
Computing metrics
Time Elapsed to compute metrics:  0.11646389961242676
Fitting loforest to example:
Time Elapsed to fit Loforest:  6.869836091995239
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.17013025283813477
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.224879264831543
Computing metrics
Time Elapsed to compute metrics:  0.2031857967376709
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  6.857691526412964
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.25653529167175293
Fitting regression split
Time Elapsed to fit regression split:  13.74904727935791
Computing metrics
Time Elapsed to compute statistics for regression split:  0.12161040306091309
Fitting weighted regression split
Time Elapsed to fit weighted regression split:  23.578124046325684
Computing metrics
Tim

100%|██████████| 50/50 [01:45<00:00,  2.10s/it]


Time Elapsed to fit ACPI:  110.79280543327332
Computing metrics


In [16]:
data_lists[0].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.9078,0.023773
1,LOCART,0.904,0.024974
2,A-LOFOREST,0.9074,0.023487
3,A-LOCART,0.9042,0.025593
4,Regresion split,0.904,0.024976
5,Weighted regression split,0.9066,0.039605
6,Mondrian,0.9056,0.028416
7,QRF-TC,0.9062,0.033429


In [17]:
data_lists[1].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.9034,0.023887
1,LOCART,0.8998,0.025333
2,A-LOFOREST,0.9034,0.02356
3,A-LOCART,0.8998,0.025333
4,Regresion split,0.8998,0.025337
5,Weighted regression split,0.8982,0.038796
6,Mondrian,0.8994,0.028023
7,QRF-TC,0.8968,0.035786


In [18]:
data_lists[2].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.9008,0.02335
1,LOCART,0.897,0.025532
2,A-LOFOREST,0.9022,0.022921
3,A-LOCART,0.897,0.025532
4,Regresion split,0.897,0.025534
5,Weighted regression split,0.897,0.037614
6,Mondrian,0.9008,0.02663
7,QRF-TC,0.907,0.033855


## Correlated homoscedastic example:

In [4]:
data_lists_corr = []
p_array = np.array([1])
n = 10000

In [25]:
for p in p_array:
    data_lists_corr.append(run_additional_example(
        kind = "correlated_homoscedastic", 
        n_train=n, 
        p = p, 
        coef = 2, 
        rate = 1, 
        B_y = 10**3, 
        min_samples_leaf = 150, 
        random_seed = 500, 
        random_state = 45,
        rho = 0.7
        )
        )

3.9375712487099315
-3.562584108066762
3.8238157666707946
-4.050973456353878
Fitting locart to toy example:
Time Elapsed to fit Locart:  0.8420150279998779
Computing metrics
Time Elapsed to compute metrics:  0.11944103240966797
Fitting loforest to example:
Time Elapsed to fit Loforest:  6.52992582321167
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.170271635055542
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.1445517539978027
Computing metrics
Time Elapsed to compute metrics:  0.20969057083129883
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  6.876238584518433
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.264087438583374
Fitting regression split
Time Elapsed to fit regression split:  0.13370132446289062
Computing metrics
Time Elapsed to compute statistics for regression split:  0.11855363845825195
Fitting weighted regression split
Time Elapsed to fit weighted regression split:  18.955026388168335
Computing 

100%|██████████| 50/50 [01:52<00:00,  2.24s/it]


Time Elapsed to fit ACPI:  117.44664645195007
Computing metrics


In [26]:
data_lists_corr[0].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.9032,0.013335
1,LOCART,0.9002,0.011062
2,A-LOFOREST,0.903,0.013184
3,A-LOCART,0.9002,0.011062
4,Regresion split,0.9002,0.011062
5,Weighted regression split,0.9072,0.035454
6,Mondrian,0.9036,0.018493
7,QRF-TC,0.8954,0.030523


In [26]:
data_lists_corr[1].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.899,0.013559
1,LOCART,0.8952,0.012026
2,A-LOFOREST,0.8982,0.013575
3,A-LOCART,0.8952,0.012026
4,Regresion split,0.8952,0.012025
5,Weighted regression split,0.9036,0.036673
6,Mondrian,0.895,0.016803
7,QRF-TC,0.9046,0.031222


In [27]:
data_lists_corr[2].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.8934,0.013288
1,LOCART,0.8878,0.011591
2,A-LOFOREST,0.8926,0.013229
3,A-LOCART,0.8878,0.011591
4,Regresion split,0.8878,0.011594
5,Weighted regression split,0.8962,0.036099
6,Mondrian,0.8928,0.018325
7,QRF-TC,0.8802,0.033872


## Correlated heteroscedastisc example:

In [3]:
data_lists_corr_het = []
p_array = np.array([1, 3, 5])
n = 10000

In [4]:
for p in p_array:
    data_lists_corr_het.append(run_additional_example(
        kind = "correlated_heteroscedastic", 
        n_train= n, 
        p = p, 
        coef = 2, 
        rate = 1, 
        B_y = 10**3,
        B_x = 5000,
        min_samples_leaf = 150, 
        random_seed = 125, 
        random_state = 45,
        split_mondrian = False,
        rho = 0.7
        )
        )

0.9843928121774829
-0.8906460270166905
1.0533061378949091
-1.0951797899883493
Fitting locart to toy example:
Time Elapsed to fit Locart:  0.9473249912261963
Computing metrics
Time Elapsed to compute metrics:  0.11813545227050781
Fitting loforest to example:
Time Elapsed to fit Loforest:  7.239319324493408
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.1719193458557129
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.2052392959594727
Computing metrics
Time Elapsed to compute metrics:  0.20970606803894043
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  7.749807357788086
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.2646007537841797
Fitting regression split
Time Elapsed to fit regression split:  0.1381092071533203
Computing metrics
Time Elapsed to compute statistics for regression split:  0.11913514137268066
Fitting weighted regression split
Time Elapsed to fit weighted regression split:  18.629920959472656
Comput

100%|██████████| 50/50 [01:52<00:00,  2.25s/it]


Time Elapsed to fit ACPI:  118.23903226852417
Computing metrics
1.3075505304720065
-1.0581827878123817
1.0533061378949091
-1.0951797899883493
Fitting locart to toy example:
Time Elapsed to fit Locart:  1.0955004692077637
Computing metrics
Time Elapsed to compute metrics:  0.11714863777160645
Fitting loforest to example:
Time Elapsed to fit Loforest:  7.519915580749512
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.16818690299987793
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.3087239265441895
Computing metrics
Time Elapsed to compute metrics:  0.2040235996246338
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  8.052265882492065
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.2549130916595459
Fitting regression split
Time Elapsed to fit regression split:  0.1335296630859375
Computing metrics
Time Elapsed to compute statistics for regression split:  0.11474752426147461
Fitting weighted regression split
Time Elap

100%|██████████| 50/50 [01:53<00:00,  2.27s/it]


Time Elapsed to fit ACPI:  119.06634879112244
Computing metrics
1.3075505304720065
-1.0581827878123817
1.0533061378949091
-1.0951797899883493
Fitting locart to toy example:
Time Elapsed to fit Locart:  1.2945520877838135
Computing metrics
Time Elapsed to compute metrics:  0.11956238746643066
Fitting loforest to example:
Time Elapsed to fit Loforest:  7.69606351852417
Computing metrics
Time Elapsed to compute metrics for Loforest:  0.169633150100708
Fitting A-locart to toy example:
Time Elapsed to fit A-Locart:  1.033369779586792
Computing metrics
Time Elapsed to compute metrics:  0.2069242000579834
Fitting A-loforest to example:
Time Elapsed to fit A-loforest:  8.155033349990845
Computing metrics
Time Elapsed to compute metrics for A-loforest:  0.2602260112762451
Fitting regression split
Time Elapsed to fit regression split:  0.1346726417541504
Computing metrics
Time Elapsed to compute statistics for regression split:  0.1209266185760498
Fitting weighted regression split
Time Elapsed t

100%|██████████| 50/50 [01:50<00:00,  2.20s/it]


Time Elapsed to fit ACPI:  115.52012848854065
Computing metrics


In [5]:
data_lists_corr_het[0].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.892,0.01329
1,LOCART,0.8708,0.020288
2,A-LOFOREST,0.8924,0.013276
3,A-LOCART,0.8708,0.020288
4,Regresion split,0.8704,0.020613
5,Weighted regression split,0.8852,0.036188
6,Mondrian,0.8736,0.022026
7,QRF-TC,0.8816,0.028062


In [6]:
data_lists_corr_het[1].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.888,0.014392
1,LOCART,0.8842,0.01728
2,A-LOFOREST,0.8872,0.014462
3,A-LOCART,0.8842,0.01728
4,Regresion split,0.8776,0.018523
5,Weighted regression split,0.8892,0.036783
6,Mondrian,0.8794,0.021786
7,QRF-TC,0.8832,0.029175


In [7]:
data_lists_corr_het[2].iloc[:, [0,2,5]]

Unnamed: 0,Methods,Average marginal coverage,Average distance
0,LOFOREST,0.889,0.013982
1,LOCART,0.8806,0.01744
2,A-LOFOREST,0.8896,0.013945
3,A-LOCART,0.8806,0.01744
4,Regresion split,0.8806,0.01744
5,Weighted regression split,0.8916,0.034546
6,Mondrian,0.8862,0.019812
7,QRF-TC,0.8662,0.036576
