In [1]:
! pip install scikit-learn



In [2]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# Load the Boston Housing dataset
boston = fetch_openml(name='boston')

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    boston.data, boston.target, test_size=0.2, random_state=42)

- version 1, status: active
  url: https://www.openml.org/search?type=data&id=531
- version 2, status: active
  url: https://www.openml.org/search?type=data&id=853



In [3]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Train the base models
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)

gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)

In [4]:
dt_pred = dt.predict(X_val)
rf_pred = rf.predict(X_val)
gb_pred = gb.predict(X_val)

In [8]:
gb_pred[:5]

array([23.4497613 , 31.46136029, 17.70531345, 24.02257316, 17.6811439 ])

In [6]:
import numpy as np
X_val_meta = np.column_stack((dt_pred, rf_pred, gb_pred))
X_val_meta

array([[28.1       , 22.839     , 23.4497613 ],
       [33.1       , 30.69      , 31.46136029],
       [17.3       , 16.278     , 17.70531345],
       [22.        , 23.51      , 24.02257316],
       [23.2       , 16.819     , 17.6811439 ],
       [18.5       , 21.374     , 22.03112796],
       [16.6       , 19.363     , 18.14958762],
       [16.6       , 15.62      , 13.83045241],
       [22.7       , 21.093     , 20.61619342],
       [22.        , 21.073     , 21.04285745],
       [20.5       , 20.022     , 19.93787694],
       [27.1       , 19.297     , 17.84771967],
       [ 8.4       ,  8.611     ,  9.00542602],
       [21.4       , 21.394     , 21.75820976],
       [18.5       , 19.386     , 19.79454516],
       [23.9       , 25.408     , 25.71887535],
       [18.8       , 19.233     , 20.04177142],
       [10.5       ,  8.538     ,  8.83726437],
       [46.        , 46.132     , 45.07118382],
       [13.        , 14.536     , 15.96280089],
       [23.1       , 24.729     , 24.197

In [9]:
from sklearn.linear_model import LinearRegression

# Combine the predictions of the base models into a single feature matrix
X_val_meta = np.column_stack((dt_pred, rf_pred, gb_pred))

# Train the meta-model on the combined feature matrix and the target values
meta_model = LinearRegression()
meta_model.fit(X_val_meta, y_val)

In [10]:
# Make predictions on new data
X_new = np.array([[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3]])
dt_pred_new = dt.predict(X_new)
rf_pred_new = rf.predict(X_new)
gb_pred_new = gb.predict(X_new)

# Combine the predictions of the base models into a single feature matrix
X_new_meta = np.column_stack((dt_pred_new, rf_pred_new, gb_pred_new))

# Make a prediction using the meta-model
y_new_pred = meta_model.predict(X_new_meta)

print("Predicted median value of owner-occupied homes: ${:.2f} thousand".format(y_new_pred[0]))


Predicted median value of owner-occupied homes: $49.72 thousand


