# Testing conditional valid metrics for locart

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor

from lcv.locart import LocartSplit, MondrianRegressionSplit
from lcv.scores import RegressionScore
from lcv.simulation import simulation
from lcv.utils import compute_interval_length, HSIC_correlation, pearson_correlation, wsc_coverage, ILS_coverage, split

Generating homoscedastic data just for testing:

In [2]:
random_seed = 1250
d = 20
coef = 2
kind = "homoscedastic"
n = 10000
sim_obj = simulation(dim = d, coef = coef)
sim_kind = getattr(sim_obj, kind)
sim_kind(n, random_seed = random_seed)
split_conformal = split(sim_obj.X, sim_obj.y, test_size = 0.5, calibrate = True)

Fitting locart to data and testing:

In [3]:
locart = LocartSplit(nc_score = RegressionScore, base_model = RandomForestRegressor, 
alpha = 0.1, split_calib = True)
locart.fit(split_conformal["X_train"], split_conformal["y_train"])
locart.calib(split_conformal["X_calib"], split_conformal["y_calib"])

# predictions
pred_locart = locart.predict(split_conformal["X_test"])

Fitting Mondrian to comparisson:

In [4]:
micp = MondrianRegressionSplit(RandomForestRegressor, alpha = 0.1, k = 10)
micp.fit(split_conformal["X_train"], split_conformal["y_train"])
micp.calibrate(split_conformal["X_test"], split_conformal["y_test"])

# predictions
pred_micp = micp.predict(split_conformal["X_test"])

## Testing metrics:

Testing firstly for homoscedastic data:

### Homoscedastic data

Correlation:

In [5]:
# testing both correlations
pc_locart = pearson_correlation(pred_locart, split_conformal["y_test"])
hsic_locart = HSIC_correlation(pred_locart, split_conformal["y_test"])
print("Pearson correlation: {} \n HSIC correlation: {}".format(pc_locart, hsic_locart))

Pearson correlation: -0.003156230005196689 
 HSIC correlation: 9.543091031167827e-10


In [6]:
# testing for mondrian
pc_micp = pearson_correlation(pred_micp, split_conformal["y_test"])
hsic_micp = HSIC_correlation(pred_micp, split_conformal["y_test"])
print("Pearson correlation: {} \n HSIC correlation: {}".format(pc_micp, hsic_micp))

Pearson correlation: 0.023968710361934605 
 HSIC correlation: 1.2051528701519785e-06


Worst slab coverage:

In [6]:
wsc_locart = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_locart)
print("Delta wsc: {}".format(wsc_locart))

Delta wsc: 0.0012422535211267238


In [7]:
wsc_micp = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_micp)
print("Delta wsc: {}".format(wsc_micp))

Delta wsc: 0.0026422535211267917


ILS coverage:

In [5]:
ils_locart_micp = ILS_coverage(pred_locart, pred_micp, split_conformal["y_test"])
print("ILS coverage: {}".format(ils_locart_micp))

ILS coverage: -0.020822222222222297


### Heteroscedastic data

In [33]:
random_seed = 1250
d = 20
coef = 2
kind = "heteroscedastic"
n = 10000
sim_obj = simulation(dim = d, coef = coef, hetero_value = 0.25)
sim_kind = getattr(sim_obj, kind)
sim_kind(n, random_seed = random_seed)
split_conformal = split(sim_obj.X, sim_obj.y, test_size = 0.5, calibrate = True)

fitting models:

In [34]:
locart = LocartSplit(nc_score = RegressionScore, base_model = RandomForestRegressor, 
alpha = 0.1, split_calib = True)
locart.fit(split_conformal["X_train"], split_conformal["y_train"])
locart.calib(split_conformal["X_calib"], split_conformal["y_calib"])

# predictions
pred_locart = locart.predict(split_conformal["X_test"])

micp = MondrianRegressionSplit(RandomForestRegressor, alpha = 0.1, k = 10)
micp.fit(split_conformal["X_train"], split_conformal["y_train"])
micp.calibrate(split_conformal["X_test"], split_conformal["y_test"])

# predictions
pred_micp = micp.predict(split_conformal["X_test"])

Correlation:

In [35]:
pc_locart = pearson_correlation(pred_locart, split_conformal["y_test"])
hsic_locart = HSIC_correlation(pred_locart, split_conformal["y_test"])

pc_micp = pearson_correlation(pred_micp, split_conformal["y_test"])
hsic_micp = HSIC_correlation(pred_micp, split_conformal["y_test"])

print("Pearson correlation for locart: {} \n HSIC correlation for locart: {}".format(pc_locart, hsic_locart))
print("Pearson correlation for mondrian: {} \n HSIC correlation for mondrian: {}".format(pc_micp, hsic_micp))

Pearson correlation for locart: 0.004393116555865671 
 HSIC correlation for locart: 2.6730960642672893e-07
Pearson correlation for mondrian: 0.018724455998235096 
 HSIC correlation for mondrian: 5.40020467283947e-06


In [36]:
ils_locart_micp = ILS_coverage(pred_locart, pred_micp, split_conformal["y_test"])
print("ILS coverage: {}".format(ils_locart_micp))

ILS coverage: 0.03744606525911709


In [37]:
wsc_locart = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_locart)
print("Delta wsc: {}".format(wsc_locart))

wsc_micp = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_micp)
print("Delta wsc: {}".format(wsc_micp))

Delta wsc: 0.012092447129909423
Delta wsc: 0.0004568578553616476


### Asymmetric data

In [28]:
random_seed = 1250
d = 20
coef = 2
kind = "asymmetric"
asym_value = 0.6
n = 10000
sim_obj = simulation(dim = d, coef = coef, asym_value= asym_value)
sim_kind = getattr(sim_obj, kind)
sim_kind(n, random_seed = random_seed)
split_conformal = split(sim_obj.X, sim_obj.y, test_size = 0.5, calibrate = True)

fitting models:

In [29]:
locart = LocartSplit(nc_score = RegressionScore, base_model = RandomForestRegressor, 
alpha = 0.1, split_calib = True)
locart.fit(split_conformal["X_train"], split_conformal["y_train"])
locart.calib(split_conformal["X_calib"], split_conformal["y_calib"])

# predictions
pred_locart = locart.predict(split_conformal["X_test"])

micp = MondrianRegressionSplit(RandomForestRegressor, alpha = 0.1, k = 10)
micp.fit(split_conformal["X_train"], split_conformal["y_train"])
micp.calibrate(split_conformal["X_test"], split_conformal["y_test"])

# predictions
pred_micp = micp.predict(split_conformal["X_test"])

Metrics:

In [30]:
pc_locart = pearson_correlation(pred_locart, split_conformal["y_test"])
hsic_locart = HSIC_correlation(pred_locart, split_conformal["y_test"])

pc_micp = pearson_correlation(pred_micp, split_conformal["y_test"])
hsic_micp = HSIC_correlation(pred_micp, split_conformal["y_test"])

print("Pearson correlation for locart: {} \n HSIC correlation for locart: {}".format(pc_locart, hsic_locart))
print("Pearson correlation for mondrian: {} \n HSIC correlation for mondrian: {}".format(pc_micp, hsic_micp))

Pearson correlation for locart: 0.004768642314300733 
 HSIC correlation for locart: 1.1279158363082266e-08
Pearson correlation for mondrian: 0.011931114803949651 
 HSIC correlation for mondrian: 4.508715588638028e-06


In [31]:
ils_locart_micp = ILS_coverage(pred_locart, pred_micp, split_conformal["y_test"])
print("ILS coverage: {}".format(ils_locart_micp))

ILS coverage: 0.04042151394422311


In [32]:
wsc_locart = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_locart)
print("Delta wsc: {}".format(wsc_locart))

wsc_micp = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_micp)
print("Delta wsc: {}".format(wsc_micp))

Delta wsc: 0.030753233830845716
Delta wsc: 0.007800000000000029


### More asymmetric

In [38]:
random_seed = 1250
d = 20
coef = 2
kind = "asymmetric"
asym_value = 1.5
n = 10000
sim_obj = simulation(dim = d, coef = coef, asym_value= asym_value)
sim_kind = getattr(sim_obj, kind)
sim_kind(n, random_seed = random_seed)
split_conformal = split(sim_obj.X, sim_obj.y, test_size = 0.5, calibrate = True)

fitting models:

In [40]:
locart = LocartSplit(nc_score = RegressionScore, base_model = RandomForestRegressor, 
alpha = 0.1, split_calib = True)
locart.fit(split_conformal["X_train"], split_conformal["y_train"])
locart.calib(split_conformal["X_calib"], split_conformal["y_calib"])

# predictions
pred_locart = locart.predict(split_conformal["X_test"])

micp = MondrianRegressionSplit(RandomForestRegressor, alpha = 0.1, k = 10)
micp.fit(split_conformal["X_train"], split_conformal["y_train"])
micp.calibrate(split_conformal["X_test"], split_conformal["y_test"])

# predictions
pred_micp = micp.predict(split_conformal["X_test"])

Metrics:

In [41]:
pc_locart = pearson_correlation(pred_locart, split_conformal["y_test"])
hsic_locart = HSIC_correlation(pred_locart, split_conformal["y_test"])

pc_micp = pearson_correlation(pred_micp, split_conformal["y_test"])
hsic_micp = HSIC_correlation(pred_micp, split_conformal["y_test"])

print("Pearson correlation for locart: {} \n HSIC correlation for locart: {}".format(pc_locart, hsic_locart))
print("Pearson correlation for mondrian: {} \n HSIC correlation for mondrian: {}".format(pc_micp, hsic_micp))

Pearson correlation for locart: 0.010153643386167801 
 HSIC correlation for locart: 2.88854195812755e-06
Pearson correlation for mondrian: -8.723385916835798e-06 
 HSIC correlation for mondrian: 4.072958723393719e-06


In [42]:
ils_locart_micp = ILS_coverage(pred_locart, pred_micp, split_conformal["y_test"])
print("ILS coverage: {}".format(ils_locart_micp))

ILS coverage: 0.0697280632411067


In [43]:
wsc_locart = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_locart)
print("Delta wsc: {}".format(wsc_locart))

wsc_micp = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_micp)
print("Delta wsc: {}".format(wsc_micp))

Delta wsc: 0.001724050632911367
Delta wsc: 0.022047852760736175


### Non-corr heteroscedastic

In [44]:
random_seed = 1250
d = 20
coef = 2
kind = "non_cor_heteroscedastic"
hetero_value = 0.25
n = 10000
sim_obj = simulation(dim = d, coef = coef, hetero_value=hetero_value)
sim_kind = getattr(sim_obj, kind)
sim_kind(n, random_seed = random_seed)
split_conformal = split(sim_obj.X, sim_obj.y, test_size = 0.5, calibrate = True)

fitting models:

In [45]:
locart = LocartSplit(nc_score = RegressionScore, base_model = RandomForestRegressor, 
alpha = 0.1, split_calib = True)
locart.fit(split_conformal["X_train"], split_conformal["y_train"])
locart.calib(split_conformal["X_calib"], split_conformal["y_calib"])

# predictions
pred_locart = locart.predict(split_conformal["X_test"])

micp = MondrianRegressionSplit(RandomForestRegressor, alpha = 0.1, k = 10)
micp.fit(split_conformal["X_train"], split_conformal["y_train"])
micp.calibrate(split_conformal["X_test"], split_conformal["y_test"])

# predictions
pred_micp = micp.predict(split_conformal["X_test"])

Metrics:

In [46]:
pc_locart = pearson_correlation(pred_locart, split_conformal["y_test"])
hsic_locart = HSIC_correlation(pred_locart, split_conformal["y_test"])

pc_micp = pearson_correlation(pred_micp, split_conformal["y_test"])
hsic_micp = HSIC_correlation(pred_micp, split_conformal["y_test"])

print("Pearson correlation for locart: {} \n HSIC correlation for locart: {}".format(pc_locart, hsic_locart))
print("Pearson correlation for mondrian: {} \n HSIC correlation for mondrian: {}".format(pc_micp, hsic_micp))

Pearson correlation for locart: 0.020396862021520204 
 HSIC correlation for locart: 5.7421433532560255e-06
Pearson correlation for mondrian: 0.02689440646500909 
 HSIC correlation for mondrian: 8.639612502657903e-06


In [47]:
ils_locart_micp = ILS_coverage(pred_locart, pred_micp, split_conformal["y_test"])
print("ILS coverage: {}".format(ils_locart_micp))

ILS coverage: -0.03618446601941738


In [48]:
wsc_locart = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_locart)
print("Delta wsc: {}".format(wsc_locart))

wsc_micp = wsc_coverage(split_conformal["X_test"], split_conformal["y_test"], pred_micp)
print("Delta wsc: {}".format(wsc_micp))

Delta wsc: 0.015239702233250663
Delta wsc: 0.008424277456647422
