In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

In [3]:
X,y

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

In [4]:
# Split into main training and test sets

X_main_train, X_test, y_main_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Further split the main training set into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X_main_train, y_main_train, test_size=0.2, random_state=42)# Train base models on the training set


In [8]:
# Base models

base_model_1 = RandomForestRegressor(random_state=1)
base_model_2 = GradientBoostingRegressor(random_state=1)

In [9]:
# Train base models on the training data

base_model_1.fit(X_train, y_train)
base_model_2.fit(X_train, y_train)

In [10]:
# Generate predictions on the validation set for the meta-model

val_preds_1 = base_model_1.predict(X_val)
val_preds_2 = base_model_2.predict(X_val)

In [13]:
val_preds_1.shape,val_preds_2.shape

((3303,), (3303,))

In [15]:
# Stack predictions together for the meta-model

meta_X = np.column_stack((val_preds_1, val_preds_2))

In [16]:
meta_X

array([[0.96464   , 1.14109779],
       [0.88053   , 0.95859223],
       [2.15226   , 2.67809685],
       ...,
       [2.29828   , 2.1078462 ],
       [2.0768401 , 2.12005055],
       [1.97914   , 1.83686723]])

In [17]:
# Meta-model
meta_model = LinearRegression()

In [18]:
# Train meta-model on the predictions of the base models (on validation set)

meta_model.fit(meta_X, y_val)

In [19]:
X_test

array([[   1.6812    ,   25.        ,    4.19220056, ...,    3.87743733,
          36.06      , -119.01      ],
       [   2.5313    ,   30.        ,    5.03938356, ...,    2.67979452,
          35.14      , -119.46      ],
       [   3.4801    ,   52.        ,    3.97715472, ...,    1.36033229,
          37.8       , -122.44      ],
       ...,
       [   9.2298    ,   25.        ,    7.23767606, ...,    2.79049296,
          37.31      , -122.05      ],
       [   2.785     ,   36.        ,    5.28902954, ...,    2.58860759,
          36.77      , -119.76      ],
       [   3.5521    ,   17.        ,    3.98883929, ...,    3.72991071,
          34.22      , -118.37      ]])

In [20]:
# Now evaluate on the test set (final holdout set)

test_preds_1 = base_model_1.predict(X_test)
test_preds_2 = base_model_2.predict(X_test)


In [21]:
# Stack test predictions for the meta-model

meta_test_X = np.column_stack((test_preds_1, test_preds_2))


In [22]:
meta_test_X

array([[0.51282   , 0.52089841],
       [0.75446   , 0.95410974],
       [4.8469871 , 4.2667676 ],
       ...,
       [4.5364074 , 4.75828635],
       [0.70672   , 0.88874344],
       [1.62128   , 1.90686373]])

In [23]:
# Make predictions with the meta-model

final_test_preds = meta_model.predict(meta_test_X)


In [24]:
final_test_preds

array([0.45098778, 0.76401081, 4.7305963 , ..., 4.67600286, 0.70887558,
       1.6874517 ])

In [27]:
# Evaluate performance using Mean Squared Error (MSE)

print("MSE of Blending Regressor on test data:", mean_squared_error(y_test, final_test_preds))
print("R2_Score of Blending Regressor:", r2_score(y_test, final_test_preds))

MSE of Blending Regressor on test data: 0.2582739794600457
R2_Score of Blending Regressor: 0.8029058227250899
