In [1]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
err = {}

# First simulated example.

Let $\theta$ be the parameter of interest and $\lambda(\theta) = F(\theta) - \hat{F}(\theta)$, $F$ being the cummulative distribution of an $\text{exp}(1)$ and $\hat{F}$ the empirical cumulative distribution the test statistic.

In [3]:
# o df_melted que retorna é só para printar as distribuicoes dos 
# lambdas com histplots e ver que elas mudam conforme theta

def generate_parameters(thetas, N, iters):
    values = pd.DataFrame()
    ## Parametros para arvore
    lambdas = []
    thetas_ = []
    ##
    for theta in thetas:
        diff = []
        theoretical = np.e**(-theta)
        for i in range(iters):
            exp = np.random.exponential(1/theta, N)
            empirical = (len([i for i in exp if i > 1])/len(exp))
            diff.append(abs(theoretical - empirical))
            lambdas.append(abs(theoretical - empirical))
            thetas_.append(theta)
        values[f"{theta}"] = diff
    df_melted = values.melt(var_name='theta')
    
    return lambdas, thetas_, df_melted

In [4]:
def eval_coverage(quantiles):
    # theta = parametro
    # lambda = estatistica de teste
    # Lambda: |P_teorica(exp > 1) - P_empirica(exp > 1)|
    j = 0
    err = 0
    for theta in thetas:
        theoretical = np.e**(-theta)
        lambdas_ = []
        for i in range(iters):
            distr = np.random.exponential(1/theta, N)
            empirical = len([i for i in distr if i > 1])/len(distr)
            diff = abs(theoretical - empirical)
            lambdas_.append(diff)
        alpha = len([i for i in lambdas_ if i >= quantiles[j]])/len(lambdas_)
        print(f"alpha for theta={theta} : {alpha} --- quantile: {quantiles[j]}")
        err += abs(alpha - 0.05)
        j += 1
    print(f"mean absolute error: {err/j}")
    return err/j

In [5]:
# isso foi usado em um experimento
def params_with_random_thetas(thetas, N):
    chosen_thetas = np.random.choice(thetas, N)
    lambdas = []
    thetas_ = []
    ##
    for theta in chosen_thetas:
        theoretical = np.e**(-theta)
        exp = np.random.exponential(1/theta, 10000)
        empirical = len([i for i in exp if i > 1])/len(exp)
        lambdas.append(abs(theoretical - empirical))
        thetas_.append(theta)
        
    return lambdas, thetas_

In [74]:
# Fica a vontade pra mudar isso aqui se quiser brincar
thetas = np.arange(0.5, 7, 0.5)
N = 10000
iters = 10000

np.random.seed(1250)
lambdas, thetas_, df_melted = generate_parameters(thetas, N, iters)

*df_melted* is the dataset with simulated $\lambda$ values for each value of $\theta$:

In [75]:
df_melted.head(10)

Unnamed: 0,theta,value
0,0.5,0.000369
1,0.5,0.003531
2,0.5,0.008069
3,0.5,0.003431
4,0.5,0.004531
5,0.5,0.004569
6,0.5,0.000669
7,0.5,0.009131
8,0.5,0.002969
9,0.5,0.005231


We want to obtain, for each $\theta$ a cutoff $C_{\theta}$ to build:
$$R(\mathcal{D}) = \{\theta \in \Theta: \lambda(\theta, \mathcal{D}) \geq C_{\theta} \} ,$$
such that:
$$\mathbb{P}_{\mathcal{D}|\theta}(\theta \in R(\mathcal{D})) \geq 1 - \alpha \quad \forall \theta \in \Theta.$$

# Naive estimator

Naive estimation of each cutoff

In [76]:
# isso aqui eh o que eu to chamando de naive. Basicamente pega o
# parametro e tira o quantil dele. usei o df_melted pq era mais facil
# por algum motivo que nao consigo lembrar de cabeça agora
naive = [
    np.quantile(list(df_melted[df_melted["theta"] == str(theta)].value), 0.95)
    for theta in thetas
]

In [77]:
err["err_naive"] = eval_coverage(naive)

alpha for theta=0.5 : 0.055 --- quantile: 0.009530659712633449
alpha for theta=1.0 : 0.0488 --- quantile: 0.009479441171442338
alpha for theta=1.5 : 0.0497 --- quantile: 0.008230160148429838
alpha for theta=2.0 : 0.0532 --- quantile: 0.0066352832366126935
alpha for theta=2.5 : 0.0576 --- quantile: 0.005284998623898807
alpha for theta=3.0 : 0.0517 --- quantile: 0.004312931632136051
alpha for theta=3.5 : 0.0536 --- quantile: 0.003302616577681494
alpha for theta=4.0 : 0.047 --- quantile: 0.0026843611112658157
alpha for theta=4.5 : 0.0532 --- quantile: 0.002008996538242309
alpha for theta=5.0 : 0.0512 --- quantile: 0.0016379469990854684
alpha for theta=5.5 : 0.0596 --- quantile: 0.0012132285615359317
alpha for theta=6.0 : 0.059 --- quantile: 0.0009787521766663593
alpha for theta=6.5 : 0.0741 --- quantile: 0.000703439192977573
mean absolute error: 0.00559230769230769


# Pinball oriented Regression Tree
Fitting a regression tree that uses the pinball loss for partitioning the data:

In [78]:
# se colocar o boosting com 1 iteracao, ele vira uma arvore
# e o quantile loss eh o pinball. ref: 
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingRegressor.html
model = GradientBoostingRegressor(
    loss = "huber", alpha = .95, learning_rate = 0.2, min_samples_leaf = 100, n_estimators = 1
)

model_thetas = np.array(thetas_).reshape(-1, 1)
model_lambdas = np.array(lambdas).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


testing first a locart way of obtaining the cutoffs (pinball-LOCART):

In [79]:
model.apply(thetas.reshape(-1, 1))

array([[ 3.],
       [ 3.],
       [ 4.],
       [ 6.],
       [ 7.],
       [10.],
       [11.],
       [11.],
       [13.],
       [13.],
       [14.],
       [14.],
       [14.]])

Obtaining quantiles in the LOCART manner:

In [80]:
leaves = np.unique(model.apply(thetas.reshape(-1, 1)))
quantiles = {}
for leaf in leaves:
    indices = model.apply(model_thetas)
    selected_lambdas = model_lambdas[indices == leaf]
    n = selected_lambdas.shape[0]
    quantiles[leaf]  = np.quantile(selected_lambdas, q=np.ceil((n + 1) * (1 - 0.05)) / n)
idxs = model.apply(thetas.reshape(-1, 1))
list_gb_quantiles = [quantiles[idx] for idx in idxs.reshape(-1)]

In [81]:
list_gb_quantiles

[0.009479441171442338,
 0.009479441171442338,
 0.008230160148429838,
 0.0066352832366126935,
 0.005284998623898807,
 0.004312931632136051,
 0.0030026165776814925,
 0.0030026165776814925,
 0.0018620530009145313,
 0.0018620530009145313,
 0.0010212478233336407,
 0.0010212478233336407,
 0.0010212478233336407]

Obtaining errors for these quantiles:

In [82]:
err["err_pinball_locart"] = eval_coverage(list_gb_quantiles)

alpha for theta=0.5 : 0.0505 --- quantile: 0.009479441171442338
alpha for theta=1.0 : 0.0503 --- quantile: 0.009479441171442338
alpha for theta=1.5 : 0.0479 --- quantile: 0.008230160148429838
alpha for theta=2.0 : 0.0557 --- quantile: 0.0066352832366126935
alpha for theta=2.5 : 0.0564 --- quantile: 0.005284998623898807
alpha for theta=3.0 : 0.0476 --- quantile: 0.004312931632136051
alpha for theta=3.5 : 0.0783 --- quantile: 0.0030026165776814925
alpha for theta=4.0 : 0.0228 --- quantile: 0.0030026165776814925
alpha for theta=4.5 : 0.0768 --- quantile: 0.0018620530009145313
alpha for theta=5.0 : 0.0247 --- quantile: 0.0018620530009145313
alpha for theta=5.5 : 0.0974 --- quantile: 0.0010212478233336407
alpha for theta=6.0 : 0.0458 --- quantile: 0.0010212478233336407
alpha for theta=6.5 : 0.0069 --- quantile: 0.0010212478233336407
mean absolute error: 0.016900000000000002


testing now cutoffs obtained from direct predictions of the model:

In [83]:
model = HistGradientBoostingRegressor(
    loss = "quantile", quantile = .95, min_samples_leaf = 100, max_iter=1
)

model_thetas = np.array(thetas_).reshape(-1, 1)
model_lambdas = np.array(lambdas).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

quantiles = model.predict(thetas.reshape(-1, 1))
quantiles

  y = column_or_1d(y, warn=True)


array([0.00650547, 0.00650364, 0.00637939, 0.00621593, 0.00608091,
       0.0059837 , 0.00588267, 0.00581397, 0.00569861, 0.00569861,
       0.00569861, 0.00569861, 0.00569861])

In [84]:
err["err_reg"] = eval_coverage(quantiles)

alpha for theta=0.5 : 0.1709 --- quantile: 0.006505472229893283
alpha for theta=1.0 : 0.1772 --- quantile: 0.006503639788343396
alpha for theta=1.5 : 0.1161 --- quantile: 0.006379390243786953
alpha for theta=2.0 : 0.0712 --- quantile: 0.006215934582291208
alpha for theta=2.5 : 0.0296 --- quantile: 0.006080906121019819
alpha for theta=3.0 : 0.0064 --- quantile: 0.005983699421843543
alpha for theta=3.5 : 0.0007 --- quantile: 0.005882667916398088
alpha for theta=4.0 : 0.0 --- quantile: 0.005813970147503357
alpha for theta=4.5 : 0.0 --- quantile: 0.005698611558721391
alpha for theta=5.0 : 0.0 --- quantile: 0.005698611558721391
alpha for theta=5.5 : 0.0 --- quantile: 0.005698611558721391
alpha for theta=6.0 : 0.0 --- quantile: 0.005698611558721391
alpha for theta=6.5 : 0.0 --- quantile: 0.005698611558721391
mean absolute error: 0.057592307692307707


## Testing MSE-LOCART

In [85]:
# defining new score using the Scores class, specifically for the df_melted dataset
from clover import Scores
from clover import LocartSplit

class LambdaScore(Scores):
    def fit(self, X, y):
        return self

    def compute(self, thetas, lambdas):
        return lambdas

    def predict(self, thetas, cutoff):
        pred = np.vstack((thetas - cutoff, thetas + cutoff)).T
        return pred

# fitting locart to the new synthetic class
locart_object = LocartSplit(LambdaScore, None, alpha = 0.05, is_fitted = True, split_calib = False)
locart_quantiles = locart_object.calib(model_thetas, model_lambdas)
idxs = locart_object.cart.apply(thetas.reshape(-1, 1))
list_locart_quantiles = [locart_quantiles[idx] for idx in idxs]

In [86]:
err["err_locart"] = eval_coverage(list_locart_quantiles)

alpha for theta=0.5 : 0.0484 --- quantile: 0.009530659712633449
alpha for theta=1.0 : 0.0516 --- quantile: 0.009479441171442338
alpha for theta=1.5 : 0.05 --- quantile: 0.008230160148429838
alpha for theta=2.0 : 0.0547 --- quantile: 0.0066352832366126935
alpha for theta=2.5 : 0.0503 --- quantile: 0.005284998623898807
alpha for theta=3.0 : 0.0445 --- quantile: 0.004312931632136051
alpha for theta=3.5 : 0.0523 --- quantile: 0.003302616577681494
alpha for theta=4.0 : 0.0475 --- quantile: 0.0026843611112658157
alpha for theta=4.5 : 0.053 --- quantile: 0.002008996538242309
alpha for theta=5.0 : 0.0514 --- quantile: 0.0016379469990854684
alpha for theta=5.5 : 0.063 --- quantile: 0.0012132285615359317
alpha for theta=6.0 : 0.0548 --- quantile: 0.0009787521766663593
alpha for theta=6.5 : 0.0725 --- quantile: 0.000703439192977573
mean absolute error: 0.00486153846153846


## Regressor - random

Generating 1000 random thetas from the grid to compute the metrics of both the gradient boosting tree and the MSE-locart:

In [87]:
# aqui, a gente gera N (=1000) thetas aleatoriamente entre
# os possiveis valores do grid e computa as metricas
lambdas_r, thetas_r = params_with_random_thetas(thetas, 1000)

Obtaining metrics first for the gradient boosting unique tree:

In [88]:
model = HistGradientBoostingRegressor(
    loss="quantile", quantile=.95, max_iter=1
)

model_thetas = np.array(thetas_r).reshape(-1, 1)
model_lambdas = np.array(lambdas_r).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


In [89]:
quantiles = model.predict(thetas.reshape(-1, 1))
quantiles

array([0.00633479, 0.00631502, 0.0062397 , 0.00604401, 0.00606857,
       0.0057016 , 0.0057016 , 0.0057016 , 0.0057016 , 0.0057016 ,
       0.0057016 , 0.0057016 , 0.0057016 ])

In [90]:
err["err_reg_random"] = eval_coverage(quantiles)

alpha for theta=0.5 : 0.1925 --- quantile: 0.006334786563000806
alpha for theta=1.0 : 0.1847 --- quantile: 0.006315015003451892
alpha for theta=1.5 : 0.1302 --- quantile: 0.006239704576894481
alpha for theta=2.0 : 0.0765 --- quantile: 0.0060440149270052125
alpha for theta=2.5 : 0.0256 --- quantile: 0.0060685705779764555
alpha for theta=3.0 : 0.0098 --- quantile: 0.00570160115277289
alpha for theta=3.5 : 0.0005 --- quantile: 0.00570160115277289
alpha for theta=4.0 : 0.0 --- quantile: 0.00570160115277289
alpha for theta=4.5 : 0.0 --- quantile: 0.00570160115277289
alpha for theta=5.0 : 0.0 --- quantile: 0.00570160115277289
alpha for theta=5.5 : 0.0 --- quantile: 0.00570160115277289
alpha for theta=6.0 : 0.0 --- quantile: 0.00570160115277289
alpha for theta=6.5 : 0.0 --- quantile: 0.00570160115277289
mean absolute error: 0.061384615384615406


Obtaining the metrics for locart:

In [91]:
# fitting locart to the new synthetic class
locart_object = LocartSplit(LambdaScore, None, alpha = 0.05, is_fitted = True, split_calib = False)
locart_quantiles = locart_object.calib(model_thetas, model_lambdas)
idxs = locart_object.cart.apply(thetas.reshape(-1, 1))
list_locart_quantiles = [locart_quantiles[idx] for idx in idxs]

In [92]:
err["err_locart_random"] = eval_coverage(list_locart_quantiles)

alpha for theta=0.5 : 0.0637 --- quantile: 0.009139509270155511
alpha for theta=1.0 : 0.0568 --- quantile: 0.009139509270155511
alpha for theta=1.5 : 0.0283 --- quantile: 0.009139509270155511
alpha for theta=2.0 : 0.082 --- quantile: 0.005926099837758356
alpha for theta=2.5 : 0.0287 --- quantile: 0.005926099837758356
alpha for theta=3.0 : 0.0068 --- quantile: 0.005926099837758356
alpha for theta=3.5 : 0.0825 --- quantile: 0.0029886714560933983
alpha for theta=4.0 : 0.0287 --- quantile: 0.0029886714560933983
alpha for theta=4.5 : 0.004 --- quantile: 0.0029886714560933983
alpha for theta=5.0 : 0.0777 --- quantile: 0.001438911239158631
alpha for theta=5.5 : 0.0252 --- quantile: 0.001438911239158631
alpha for theta=6.0 : 0.1062 --- quantile: 0.0008076645450902484
alpha for theta=6.5 : 0.0281 --- quantile: 0.0008076645450902484
mean absolute error: 0.0283923076923077


## 20 iters in each theta

Making less iterations for each theta and testing the gradient boosting, naive and so on:

In [93]:
# Aqui foi uma tentativa de fazer poucas iteracoes em cima de cada theta,
# que é algo mais proximo do que acontece na realidade
lambdas_few, thetas_few, _ = generate_parameters(thetas, N, 20)

In [66]:
model_thetas

array([[0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [0.5],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1. ],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [1.5],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
       [2. ],
      

In [94]:
model = HistGradientBoostingRegressor(
    loss="quantile", quantile=.95, max_iter=1
)

model_thetas = np.array(thetas_few).reshape(-1, 1)
model_lambdas = np.array(lambdas_few).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


In [95]:
quantiles = model.predict(thetas.reshape(-1, 1))
quantiles

array([0.00595127, 0.00627218, 0.00591232, 0.00561421, 0.00559368,
       0.00528434, 0.00528434, 0.00528434, 0.00528434, 0.00528434,
       0.00528434, 0.00528434, 0.00528434])

In [96]:
err["err_reg_few_iters"] = eval_coverage(quantiles)

alpha for theta=0.5 : 0.2121 --- quantile: 0.005951271655975811
alpha for theta=1.0 : 0.1923 --- quantile: 0.006272180735542627
alpha for theta=1.5 : 0.1538 --- quantile: 0.005912316616754129
alpha for theta=2.0 : 0.104 --- quantile: 0.005614206521407959
alpha for theta=2.5 : 0.0409 --- quantile: 0.0055936809062637075
alpha for theta=3.0 : 0.0154 --- quantile: 0.005284337729609411
alpha for theta=3.5 : 0.0025 --- quantile: 0.005284337729609411
alpha for theta=4.0 : 0.0002 --- quantile: 0.005284337729609411
alpha for theta=4.5 : 0.0 --- quantile: 0.005284337729609411
alpha for theta=5.0 : 0.0 --- quantile: 0.005284337729609411
alpha for theta=5.5 : 0.0 --- quantile: 0.005284337729609411
alpha for theta=6.0 : 0.0 --- quantile: 0.005284337729609411
alpha for theta=6.5 : 0.0 --- quantile: 0.005284337729609411
mean absolute error: 0.06563076923076924


Testing also for MSE-locart:

In [97]:
# fitting locart to the new synthetic class
# changing minimal sample leaves to a new strategic value
locart_object = LocartSplit(LambdaScore, None, alpha = 0.05, is_fitted = True, split_calib = False)
locart_quantiles = locart_object.calib(model_thetas, model_lambdas, min_samples_leaf = 5)
idxs = locart_object.cart.apply(thetas.reshape(-1, 1))
list_locart_quantiles = [locart_quantiles[idx] for idx in idxs]

In [98]:
err["err_locart_few"] = eval_coverage(list_locart_quantiles)

alpha for theta=0.5 : 0.0214 --- quantile: 0.011365619094593767
alpha for theta=1.0 : 0.0184 --- quantile: 0.011365619094593767
alpha for theta=1.5 : 0.0083 --- quantile: 0.011365619094593767
alpha for theta=2.0 : 0.0735 --- quantile: 0.006046244260783349
alpha for theta=2.5 : 0.0262 --- quantile: 0.006046244260783349
alpha for theta=3.0 : 0.1138 --- quantile: 0.0034275448175940036
alpha for theta=3.5 : 0.0446 --- quantile: 0.0034275448175940036
alpha for theta=4.0 : 0.0094 --- quantile: 0.0034275448175940036
alpha for theta=4.5 : 0.0015 --- quantile: 0.0034275448175940036
alpha for theta=5.0 : 0.0886 --- quantile: 0.0013695530009145325
alpha for theta=5.5 : 0.0328 --- quantile: 0.0013695530009145325
alpha for theta=6.0 : 0.0063 --- quantile: 0.0013695530009145325
alpha for theta=6.5 : 0.0008 --- quantile: 0.0013695530009145325
mean absolute error: 0.035092307692307693


# Boosting

Making use of the boosting algorithm to obtain the cutoff via a quantile regression

In [99]:
model = HistGradientBoostingRegressor(
    loss="quantile", quantile=.95
)

model_thetas = np.array(thetas_).reshape(-1, 1)
model_lambdas = np.array(lambdas).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


In [100]:
model.n_iter_

51

In [101]:
boosting_quantiles = model.predict(thetas.reshape(-1, 1))
boosting_quantiles

array([0.00951491, 0.00950486, 0.0081604 , 0.00663296, 0.00528894,
       0.00432138, 0.00331576, 0.00270037, 0.00202814, 0.00165534,
       0.00123567, 0.00100505, 0.00073426])

In [102]:
err["err_boosting"] = eval_coverage(boosting_quantiles)

alpha for theta=0.5 : 0.0522 --- quantile: 0.009514910606241976
alpha for theta=1.0 : 0.0504 --- quantile: 0.009504856574083537
alpha for theta=1.5 : 0.0521 --- quantile: 0.008160402768874109
alpha for theta=2.0 : 0.0531 --- quantile: 0.006632964037769399
alpha for theta=2.5 : 0.0536 --- quantile: 0.00528894258207936
alpha for theta=3.0 : 0.0496 --- quantile: 0.004321384423602406
alpha for theta=3.5 : 0.0512 --- quantile: 0.0033157556121591555
alpha for theta=4.0 : 0.0442 --- quantile: 0.0027003678604686204
alpha for theta=4.5 : 0.0515 --- quantile: 0.0020281358969182293
alpha for theta=5.0 : 0.042 --- quantile: 0.0016553416743011425
alpha for theta=5.5 : 0.0491 --- quantile: 0.001235674120558699
alpha for theta=6.0 : 0.0435 --- quantile: 0.0010050485890194464
alpha for theta=6.5 : 0.044 --- quantile: 0.0007342571264241723
mean absolute error: 0.003207692307692308


## Random - Boosting

Repeating the random thetas experiment:

In [103]:
# boosting com os parametros gerados aleatoriamente
model_r = HistGradientBoostingRegressor(
    loss="quantile", quantile=.95
)

In [104]:
model_thetas = np.array(thetas_r).reshape(-1, 1)
model_lambdas = np.array(lambdas_r).reshape(-1, 1)
model_r.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


In [105]:
model_r.n_iter_

100

In [106]:
boosting_quantiles_r = model_r.predict(thetas.reshape(-1, 1))
boosting_quantiles_r

array([0.00914058, 0.00894287, 0.00818978, 0.00623294, 0.00647849,
       0.00461665, 0.00340006, 0.0030394 , 0.00180907, 0.00145617,
       0.00117344, 0.00094448, 0.00056116])

In [107]:
err["err_boosting_random"] = eval_coverage(boosting_quantiles_r)

alpha for theta=0.5 : 0.057 --- quantile: 0.00914057690383788
alpha for theta=1.0 : 0.06 --- quantile: 0.008942866559951539
alpha for theta=1.5 : 0.0501 --- quantile: 0.008189782297880236
alpha for theta=2.0 : 0.0673 --- quantile: 0.006232937776896031
alpha for theta=2.5 : 0.0193 --- quantile: 0.00647848776428406
alpha for theta=3.0 : 0.0374 --- quantile: 0.004616650655146988
alpha for theta=3.5 : 0.0493 --- quantile: 0.0034000614403316563
alpha for theta=4.0 : 0.0231 --- quantile: 0.0030393998005053646
alpha for theta=4.5 : 0.0759 --- quantile: 0.0018090736622344364
alpha for theta=5.0 : 0.0758 --- quantile: 0.0014561710521714614
alpha for theta=5.5 : 0.0752 --- quantile: 0.0011734427000124885
alpha for theta=6.0 : 0.0563 --- quantile: 0.000944478226890295
alpha for theta=6.5 : 0.1592 --- quantile: 0.0005611568324452675
mean absolute error: 0.022899999999999997


## 20 iters in each theta - Boosting

In [108]:
# boosting com poucas observacoes sobre cada theta
model = HistGradientBoostingRegressor(
    loss="quantile", quantile=.95,
)

model_thetas = np.array(thetas_few).reshape(-1, 1)
model_lambdas = np.array(lambdas_few).reshape(-1, 1)
model.fit(model_thetas, model_lambdas)

  y = column_or_1d(y, warn=True)


In [109]:
model.n_iter_

100

In [110]:
quantiles = model.predict(thetas.reshape(-1, 1))
quantiles

array([0.00919342, 0.01239099, 0.00882378, 0.00577641, 0.00558608,
       0.00492352, 0.00278027, 0.00265441, 0.00274259, 0.0017383 ,
       0.00150412, 0.00130084, 0.00072386])

In [111]:
err["err_boosting_few_iters"] = eval_coverage(quantiles)

alpha for theta=0.5 : 0.0622 --- quantile: 0.00919342425185533
alpha for theta=1.0 : 0.0112 --- quantile: 0.012390993913237138
alpha for theta=1.5 : 0.0342 --- quantile: 0.00882378180140067
alpha for theta=2.0 : 0.0897 --- quantile: 0.005776413735249767
alpha for theta=2.5 : 0.0421 --- quantile: 0.005586083816467919
alpha for theta=3.0 : 0.0237 --- quantile: 0.004923517746358238
alpha for theta=3.5 : 0.11 --- quantile: 0.0027802676585627034
alpha for theta=4.0 : 0.0482 --- quantile: 0.002654408301393961
alpha for theta=4.5 : 0.0095 --- quantile: 0.002742594277357665
alpha for theta=5.0 : 0.0315 --- quantile: 0.001738301331363586
alpha for theta=5.5 : 0.0203 --- quantile: 0.0015041194997289955
alpha for theta=6.0 : 0.0086 --- quantile: 0.0013008423245348634
alpha for theta=6.5 : 0.0549 --- quantile: 0.0007238562075916346
mean absolute error: 0.025961538461538463


In [112]:
for key, value in err.items():
    print(f"{key} = {value}")

err_naive = 0.00559230769230769
err_pinball_locart = 0.016900000000000002
err_reg = 0.057592307692307707
err_locart = 0.00486153846153846
err_reg_random = 0.061384615384615406
err_locart_random = 0.0283923076923077
err_reg_few_iters = 0.06563076923076924
err_locart_few = 0.035092307692307693
err_boosting = 0.003207692307692308
err_boosting_random = 0.022899999999999997
err_boosting_few_iters = 0.025961538461538463
