# Regression

In [13]:
from CoveringStackModels import CoveringStackRegressor, CoveringStackClassifier, _BaseCoveringStack
import numpy as np

In [14]:
from sklearn.model_selection import KFold
from sklearn.base import RegressorMixin

class CustomBlocksCoveringStackRegressor(_BaseCoveringStack, RegressorMixin):
    def __init__(self, *, blocks, l1_estimator, l2_estimator, splitter_cls=KFold, splitter_kwargs=None):
        super().__init__(
            blocks=blocks,
            l1_estimator=l1_estimator,
            l2_estimator=l2_estimator,
            splitter_cls=splitter_cls,
            splitter_kwargs=splitter_kwargs,
        )

    def _model_output(self, est, X):
        return est.predict(X)

    def predict(self, X):
        return self._predict_raw(X)

In [16]:
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

y = np.array([1, 2, 3, 4])
X = np.zeros((4, 1))
cv = KFold(n_splits=4)

# Note, a linear regression with a constant X vector is just a dummy regressor

# ----------------------
# 1) Standard KFold stack
# ----------------------
oof_l1_kf = cross_val_predict(LinearRegression(), X, y, cv=cv)
oof_l2_kf = cross_val_predict(
    LinearRegression(),
    oof_l1_kf.reshape(-1, 1),
    y,
    cv=cv
)

print("KFold R²:", r2_score(y, oof_l2_kf))


# ----------------------
# 2) _CoveringStackRegressor
# ----------------------
blocks = [
    [1, 2, 3],
    [1, 2, 4],
    [1, 3, 4]
]

cov_model = CustomBlocksCoveringStackRegressor(
    blocks=blocks,
    l1_estimator=LinearRegression(),
    l2_estimator=LinearRegression()
).fit(X, y)

oof_cov = cov_model.oof_


l1_oof_pred = np.nanmean(cov_model.l1_oof_[:, :, 0], axis=1)

print("Covering R² (L1):", r2_score(y, l1_oof_pred))
print("Covering R² (L2):", r2_score(y, oof_cov))

KFold R²: 1.0
Covering R² (L1): -0.7
Covering R² (L2): -1.1500000000000008


That's good! 

The L2 model didn't overfit, but the quasi L3 model (The correlation calc) did. 

That is exactly what we wanted.

In [18]:
import numpy as np
from sklearn.model_selection import GroupKFold, cross_val_predict
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# -------------------------
# Data (PURE NOISE)
# -------------------------
rng = np.random.default_rng(0)

K = 5          # must be >= 3
M = 1000         # number of categories

# one sample per category per fold
cats = np.repeat(np.arange(M), K)
groups = np.tile(np.arange(K), M)   # fold id
y = rng.normal(size=K * M)          # pure noise

# one-hot encode category (drop one column to avoid collinearity)
X_cat = np.eye(M)[cats][:, 1:]      # (N x (M-1))

cv = GroupKFold(n_splits=K)

# -------------------------
# L1: grouped mean via linear regression
# -------------------------
# linear regression on one-hot = per-category mean
oof_l1 = cross_val_predict(
    LinearRegression(),
    X_cat,
    y,
    cv=cv,
    groups=groups
)

# -------------------------
# L2: linear regression on [one-hot, z]
# -------------------------
X_l2 = np.column_stack([X_cat, oof_l1])

oof_l2 = cross_val_predict(
    LinearRegression(),
    X_l2,
    y,
    cv=cv,
    groups=groups
)

print("Regular L2 R²:", r2_score(y, oof_l2))


cov_model = CoveringStackRegressor(
    l1_estimator=LinearRegression(),
    l2_estimator=LinearRegression(),
    max_l1_fits=10
).fit(X_cat, y)

l1_oof_pred = np.nanmean(cov_model.l1_oof_[:, :, 0], axis=1)

print("Covering R² (L1):", r2_score(y, l1_oof_pred))
print("Covering R² (L2):", r2_score(y, cov_model.oof_))

Regular L2 R²: 1.0
Covering R² (L1): -0.0038187170357346645
Covering R² (L2): -0.011134883860827172


Nice, again the L3 model would've overfit, but it needed to be a grouped linear regression not just a straight line of OOF this time.

Now for a test on real data. 

# Classification

In [7]:
import numpy as np
from sklearn.model_selection import GroupKFold, cross_val_predict
from sklearn.linear_model import LogisticRegression
from CoveringStackModels import CoveringStackClassifier
from sklearn.metrics import roc_auc_score

# -------------------------
# Data (PURE NOISE)
# -------------------------
rng = np.random.default_rng(0)

K = 5
M = 1000

# one sample per category per fold
cats = np.repeat(np.arange(M), K)
groups = np.tile(np.arange(K), M)
y = rng.integers(0, 2, size=K * M)  # binary noise

# one-hot encode category (drop one column)
X_cat = np.eye(M)[cats][:, 1:]

cv = GroupKFold(n_splits=K)

# -------------------------
# L1
# -------------------------
oof_l1 = cross_val_predict(
    LogisticRegression(),
    X_cat,
    y,
    cv=cv,
    groups=groups,
    method="predict_proba"
)[:, 1]

# -------------------------
# L2
# -------------------------
X_l2 = np.column_stack([X_cat, oof_l1])

oof_l2 = cross_val_predict(
    LogisticRegression(),
    X_l2,
    y,
    cv=cv,
    groups=groups,
    method="predict_proba"
)[:, 1]

print("Regular L2 AUC:", roc_auc_score(y, oof_l2))


# -------------------------
# Covering version (classification)
# -------------------------
cov_model = CoveringStackClassifier(
    l1_estimator=LogisticRegression(),
    l2_estimator=LogisticRegression(),
    max_l1_fits=10,
).fit(X_cat, y)


l1_oof_pred = np.nanmean(cov_model.l1_oof_[:, :, 0], axis=1)

print("Covering AUC L1:", roc_auc_score(y, l1_oof_pred))
print("Covering AUC L2:", roc_auc_score(y, cov_model.oof_))

Regular L2 AUC: 0.9529698094955118
Covering AUC L1: 0.3355316958936325
Covering AUC L2: 0.34604997952760363


# Time for some real data baby

In [1]:
from autogluon.tabular import TabularDataset
from autogluon.features.generators import AutoMLPipelineFeatureGenerator

path_prefix = 'https://autogluon.s3.amazonaws.com/datasets/leakage/airlines/'
path_train = path_prefix + 'train_data.csv'
path_test  = path_prefix + 'test_data.csv'

label = 'Delay'

train_data = TabularDataset(path_train)
test_data  = TabularDataset(path_test)

# ----------------------------
# Feature-generator output (what most Tabular models train on)
# ----------------------------
fg = AutoMLPipelineFeatureGenerator()

X_train = fg.fit_transform(train_data.drop(columns=[label]))
y_train = train_data[label]

# transform test with the *same* fitted generator
X_test = fg.transform(test_data.drop(columns=[label]))
y_test = test_data[label]

print(X_train.shape, X_test.shape)
X_train.head()

(485444, 7) (53939, 7)


Unnamed: 0,Flight,DayOfWeek,Time,Length,Airline,AirportFrom,AirportTo
0,2774.0,5,1235.0,80.0,10,79,68
1,2313.0,1,1296.0,141.0,5,15,129
2,6948.0,4,360.0,146.0,12,64,208
3,1247.0,3,1170.0,143.0,3,34,60
4,31.0,6,1410.0,344.0,14,202,217


In [2]:
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

# assumes you already have:
# X_train, y_train, X_test, y_test
# from AutoMLPipelineFeatureGenerator

K = 8
n_estimators = 50

cv = StratifiedKFold(n_splits=K, shuffle=True, random_state=0)

# -------------------------
# L1 (RF) OOF probs
# -------------------------
oof_l1 = cross_val_predict(
    RandomForestClassifier(n_estimators=n_estimators, random_state=0, n_jobs=-1),
    X_train,
    y_train,
    cv=cv,
    method="predict_proba"
)[:, 1]

print("Regular L1 AUC (train OOF):", roc_auc_score(y_train, oof_l1))

# -------------------------
# L2 (RF) on [X, oof_l1]
# -------------------------
X_l2 = np.column_stack([X_train.to_numpy(), oof_l1])

oof_l2 = cross_val_predict(
    RandomForestClassifier(n_estimators=n_estimators, random_state=0, n_jobs=-1),
    X_l2,
    y_train,
    cv=cv,
    method="predict_proba"
)[:, 1]

print("Regular L2 AUC (train OOF):", roc_auc_score(y_train, oof_l2))

# -------------------------
# Optional: evaluate on test (fit on full train)
# -------------------------
l1 = RandomForestClassifier(n_estimators=n_estimators, random_state=0, n_jobs=-1).fit(X_train, y_train)
p1_test = l1.predict_proba(X_test)[:, 1]

l2 = RandomForestClassifier(n_estimators=n_estimators, random_state=0, n_jobs=-1).fit(
    np.column_stack([X_train.to_numpy(), oof_l1]), y_train
)
p2_test = l2.predict_proba(np.column_stack([X_test.to_numpy(), p1_test]))[:, 1]

print("Regular L2 AUC (test):", roc_auc_score(y_test, p2_test))

Regular L1 AUC (train OOF): 0.6592209023322024
Regular L2 AUC (train OOF): 0.8275332365679232
Regular L2 AUC (test): 0.6847744908489939


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from CoveringStackModels import CoveringStackClassifier
import numpy as np

n_estimators = 100

cov_model = CoveringStackClassifier(
    l1_estimator=RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1),
    l2_estimator=RandomForestClassifier(n_estimators=n_estimators,  n_jobs=-1),
    max_l1_fits=10,
).fit(np.asarray(X_train), y_train)

uniform_p, routed_p, l1_p = cov_model.predict_uniform_and_routed_mean(X_test)


l1_oof_pred = np.nanmean(cov_model.l1_oof_[:, :, 0], axis=1)

print("Covering AUC (train L1):", roc_auc_score(y_train, l1_oof_pred))
print("Covering AUC (train L2):", roc_auc_score(y_train, cov_model.oof_))

print("\n")

print("Covering AUC (test L1):", roc_auc_score(y_test, l1_p))
print("Covering AUC (test Routed):", roc_auc_score(y_test, routed_p))
print("Covering AUC (test Uniform):", roc_auc_score(y_test, uniform_p))

Covering AUC (train L1): 0.6748260310864647
Covering AUC (train L2): 0.6624364742676505


Covering AUC (test L1): 0.6776727953195385
Covering AUC (test Routed): 0.6966933271601498
Covering AUC (test Uniform): 0.7085701888175125


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from CoveringStackModels import CoveringStackClassifier
import numpy as np

n_estimators = 50

cov_model = CoveringStackClassifier(
    l1_estimator=RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1),
    l2_estimator=RandomForestClassifier(n_estimators=n_estimators,  n_jobs=-1),
    max_l1_fits=10,
).fit(np.asarray(X_train), y_train)

uniform_p, routed_p, l1_p = cov_model.predict_uniform_and_routed_mean(X_test)


l1_oof_pred = np.nanmean(cov_model.l1_oof_[:, :, 0], axis=1)

print("Covering AUC (train L1):", roc_auc_score(y_train, l1_oof_pred))
print("Covering AUC (train L2):", roc_auc_score(y_train, cov_model.oof_))

print("\n")

print("Covering AUC (test L1):", roc_auc_score(y_test, l1_p))
print("Covering AUC (test Routed):", roc_auc_score(y_test, routed_p))
print("Covering AUC (test Uniform):", roc_auc_score(y_test, uniform_p))

Covering AUC (train L1): 0.6743964186671376
Covering AUC (train L2): 0.6605682476971614


Covering AUC (test L1): 0.6777335880926416
Covering AUC (test Routed): 0.6964275644455573
Covering AUC (test Uniform): 0.7083303584995808


No leak!