In [2]:
from sklearn.ensemble import HistGradientBoostingRegressor
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# loforest and locart functions
from CP2LFI.loforest import ConformalLoforest, tune_loforest_LFI
from CP2LFI.scores import LambdaScore
from CP2LFI.simulations import Simulations, naive, predict_naive_quantile


from scipy import stats
from clover import LocartSplit

from copy import deepcopy

from tqdm import tqdm
import itertools

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Testing LRT for every model of choice

Plotting and comparing the performance of each method:

In [None]:
def obtain_quantiles(
    kind_model,
    thetas,
    N,
    rng,
    B=1000,
    alpha=0.05,
    min_samples_leaf=100,
    n_estimators = 100,
    K = 50,
    naive_n=500
):
    # fitting and predicting naive
    naive_quantiles = naive(stat = "lrt", kind_model = kind_model, alpha = alpha, rng = rng, B=B, N=N, naive_n= naive_n)
    naive_list = predict_naive_quantile(kind_model, thetas, naive_quantiles)

    # simulating to fit models
    sim_obj = Simulations(rng=rng, kind_model=kind_model)
    thetas_sim, model_lambdas = sim_obj.LRT_sample(B = B , N = N)

    if thetas_sim.ndim == 1:
        model_thetas = thetas_sim.reshape(-1, 1)
    else:
        model_thetas = thetas_sim

    locart_object = LocartSplit(
        LambdaScore, None, alpha=alpha, is_fitted=True, split_calib=False
    )
    locart_quantiles = locart_object.calib(
        model_thetas, model_lambdas, min_samples_leaf=min_samples_leaf
    )

    # loforest quantiles
    loforest_object = ConformalLoforest(
        LambdaScore, None, alpha=alpha, is_fitted=True, split_calib=False
    )
    loforest_object.calibrate(
        model_thetas, 
        model_lambdas, 
        min_samples_leaf=min_samples_leaf, 
        n_estimators= n_estimators,
        K = K,
    )

    # boosting quantiles
    model = HistGradientBoostingRegressor(
        loss="quantile",
        max_iter=100,
        max_depth=3,
        quantile=1 - alpha,
        random_state=105,
        n_iter_no_change=15,
        early_stopping=True,
    )
    model.fit(model_thetas, model_lambdas)

    if thetas.ndim == 1:
        valid_thetas = thetas.reshape(-1, 1)
    else:
        valid_thetas = thetas

    # locart quantiles
    idxs = locart_object.cart.apply(valid_thetas)
    list_locart_quantiles = [locart_quantiles[idx] for idx in idxs]

    # loforest
    loforest_cutoffs = loforest_object.compute_cutoffs(valid_thetas)

    # boosting
    boosting_quantiles = model.predict(valid_thetas)

    # tuned loforest
    arr = np.arange(30, 95, 5)
    K_grid = np.concatenate(([0], arr))

    K_loforest = tune_loforest_LFI(
        loforest_object, valid_thetas, model_lambdas, K_grid=K_grid
    )

    loforest_cutoffs_tuned = loforest_object.compute_cutoffs(
        valid_thetas, K=K_loforest
    )

    print("Tuned K: ", K_loforest)
    # ks quantile
    ks_quantiles = np.tile(stats.kstwobign.ppf(1 - alpha), thetas.shape[0])

    # dictionary of quantiles
    quantile_dict = {
        "naive": naive_list,
        "locart": list_locart_quantiles,
        "loforest": loforest_cutoffs,
        "tuned_loforest": loforest_cutoffs_tuned,
        "boosting": boosting_quantiles,
        "ks": ks_quantiles,
    }

    return quantile_dict