In [1]:
import numpy as np
from sklearn.base import RegressorMixin, clone

from sklearn.linear_model import QuantileRegressor
from mapie.quantile_regression import MapieQuantileRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from inspect import signature
from sklearn.model_selection import train_test_split
from mapie.metrics import regression_coverage_score
from sklearn.datasets import make_regression

random_state = 1

In [2]:
X, y = make_regression(n_samples=500, n_features=10, noise=1.0, random_state=random_state)
k = np.ones(shape=(5, X.shape[1]))
n_samples = len(X)

# Analysis Gradient Boosting Regressor

We can observe that we have a difference between all of the training scores despite having the same specification, or at least should be having the the same since we have constant weights.

In [4]:
# gb = GradientBoostingRegressor(loss="quantile", random_state=1)

# gb1 = clone(gb)
# gb2 = clone(gb)
# gb3 = clone(gb)
# gb1.fit(X, y, sample_weight=None)
# gb2.fit(X, y, sample_weight=np.ones(shape=n_samples))
# gb3.fit(X, y, sample_weight=np.ones(shape=n_samples)*5)

# np.testing.assert_allclose(
#     gb1.__dict__["train_score_"],
#     gb2.__dict__["train_score_"]
# )

# np.testing.assert_allclose(
#     gb1.__dict__["train_score_"],
#     gb3.__dict__["train_score_"]
# )

# print("all good")

all good


# Quantile regression

Seems to be working correctly, we do not see a difference with different constants for sample weights

In [4]:
qt = QuantileRegressor(solver='highs')

qt1 = clone(qt)
qt2 = clone(qt)
qt3 = clone(qt)
qt1.fit(X, y, sample_weight=None)
qt2.fit(X, y, sample_weight=np.ones(shape=n_samples))
qt3.fit(X, y, sample_weight=np.ones(shape=n_samples)*5)

print(qt1.__dict__)
print(qt2.__dict__)
print(qt3.__dict__)

X_test, y_test = make_regression(n_samples=2, n_features=10, noise=1.0, random_state=random_state)

y_pred1 = qt1.predict(X_test)
y_pred2 = qt2.predict(X_test)
y_pred3 = qt3.predict(X_test)

np.testing.assert_allclose(y_pred1, y_pred2)
np.testing.assert_allclose(y_pred1, y_pred3)


{'quantile': 0.5, 'alpha': 1.0, 'fit_intercept': True, 'solver': 'highs', 'solver_options': None, 'n_features_in_': 10, 'n_iter_': 500, 'coef_': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'intercept_': 9.279029358836635}
{'quantile': 0.5, 'alpha': 1.0, 'fit_intercept': True, 'solver': 'highs', 'solver_options': None, 'n_features_in_': 10, 'n_iter_': 500, 'coef_': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'intercept_': 9.279029358836635}
{'quantile': 0.5, 'alpha': 1.0, 'fit_intercept': True, 'solver': 'highs', 'solver_options': None, 'n_features_in_': 10, 'n_iter_': 500, 'coef_': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'intercept_': 9.279029358836635}


# Checking MapieQuantileRegressor with QuantileRegressor 

For some reasons, it seems like only the prediction with quantile=0.5 is the one that is messing up with mapie3, not sure why. Still to be investigated.

In [5]:
qt = QuantileRegressor(solver='highs')

mapie = MapieQuantileRegressor(
    estimator=qt,
    alpha=0.2
)
mapie1 = clone(mapie)
mapie2 = clone(mapie)
mapie3 = clone(mapie)

X_train, y_train, X_calib, y_calib = train_test_split(X, y, test_size=0.5, random_state=random_state)
X_test, y_test = make_regression(n_samples=2, n_features=10, noise=1.0, random_state=random_state)

print("pred1")
mapie1.fit(X_train, X_calib, y_train, y_calib, sample_weight=None)
y_pred1, y_pis1 = mapie1.predict(X_test)
print(y_pred1)

print("pred2")
mapie2.fit(X_train, X_calib, y_train, y_calib, sample_weight=None)
y_pred2, y_pis2 = mapie2.predict(X_test)
print(y_pred2)

print("pred3")
mapie3.fit(X_train, X_calib, y_train, y_calib, sample_weight=None)
y_pred3, y_pis3 = mapie3.predict(X_test)
print(y_pred3)

np.testing.assert_allclose(mapie1.y_calib_pred, mapie2.y_calib_pred)
np.testing.assert_allclose(mapie1.y_calib_pred, mapie3.y_calib_pred)

np.testing.assert_allclose(mapie1.conformity_scores_, mapie2.conformity_scores_)
np.testing.assert_allclose(mapie1.conformity_scores_, mapie3.conformity_scores_)

np.testing.assert_allclose(y_pis1, y_pis2)
np.testing.assert_allclose(y_pis1, y_pis3)

np.testing.assert_allclose(y_pred1, y_pred2)
np.testing.assert_allclose(y_pred1, y_pred3)

pred1
[12.47840885 12.47840885]
pred2
[12.47840885 12.47840885]
pred3
[12.47840885 12.47840885]


In [6]:
qt = QuantileRegressor(solver='highs')

mapie = MapieQuantileRegressor(
    estimator=qt,
    alpha=0.2
)
mapie1 = clone(mapie)

X_train, y_train, X_calib, y_calib = train_test_split(X, y, test_size=0.5, random_state=random_state)
mapie1.fit(X_train, X_calib, y_train, y_calib, sample_weight=None)


X_test, y_test = make_regression(n_samples=2, n_features=10, noise=1.0, random_state=random_state)

y_pred1, y_pis1 = mapie1.predict(X_test)
print(y_pred1)

[12.47840885 12.47840885]
