In [22]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.ensemble import StackingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.model_selection import train_test_split, RandomizedSearchCV, learning_curve, GridSearchCV
import ssl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
housing = fetch_california_housing()

In [3]:
data = pd.DataFrame(data=housing.data, columns=housing.feature_names)
data

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [4]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

In [5]:
scaled_data = pd.DataFrame(data=scaled_data, columns=housing.feature_names)
scaled_data

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,2.344766,0.982143,0.628559,-0.153758,-0.974429,-0.049597,1.052548,-1.327835
1,2.332238,-0.607019,0.327041,-0.263336,0.861439,-0.092512,1.043185,-1.322844
2,1.782699,1.856182,1.155620,-0.049016,-0.820777,-0.025843,1.038503,-1.332827
3,0.932968,1.856182,0.156966,-0.049833,-0.766028,-0.050329,1.038503,-1.337818
4,-0.012881,1.856182,0.344711,-0.032906,-0.759847,-0.085616,1.038503,-1.337818
...,...,...,...,...,...,...,...,...
20635,-1.216128,-0.289187,-0.155023,0.077354,-0.512592,-0.049110,1.801647,-0.758826
20636,-0.691593,-0.845393,0.276881,0.462365,-0.944405,0.005021,1.806329,-0.818722
20637,-1.142593,-0.924851,-0.090318,0.049414,-0.369537,-0.071735,1.778237,-0.823713
20638,-1.054583,-0.845393,-0.040211,0.158778,-0.604429,-0.091225,1.778237,-0.873626


In [6]:
X = scaled_data.values
y = housing.target

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [8]:
estimators = [
    ('blr', BayesianRidge()),
    ('rtr', RandomForestRegressor(max_depth=10, n_estimators=10, random_state=42))   
]

In [9]:
reg = StackingRegressor(estimators=estimators,
                       final_estimator=DecisionTreeRegressor())

In [10]:
reg.fit(X_train, y_train)

StackingRegressor(estimators=[('blr', BayesianRidge()),
                              ('rtr',
                               RandomForestRegressor(max_depth=10,
                                                     n_estimators=10,
                                                     random_state=42))],
                  final_estimator=DecisionTreeRegressor())

In [11]:
reg.score(X_train, y_train)

0.6085402229960182

In [12]:
reg.score(X_test, y_test)

0.50895179786952

In [13]:
reg.get_params()

{'cv': None,
 'estimators': [('blr', BayesianRidge()),
  ('rtr',
   RandomForestRegressor(max_depth=10, n_estimators=10, random_state=42))],
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__criterion': 'mse',
 'final_estimator__max_depth': None,
 'final_estimator__max_features': None,
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_impurity_split': None,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__presort': 'deprecated',
 'final_estimator__random_state': None,
 'final_estimator__splitter': 'best',
 'final_estimator': DecisionTreeRegressor(),
 'n_jobs': None,
 'passthrough': False,
 'verbose': 0,
 'blr': BayesianRidge(),
 'rtr': RandomForestRegressor(max_depth=10, n_estimators=10, random_state=42),
 'blr__alpha_1': 1e-06,
 'blr__alpha_2': 1e-06,
 'blr__alpha_init': None,
 'blr__compute_score': False,
 'blr__co

In [14]:
only_rtr = RandomForestRegressor(max_depth=10, n_estimators=10, random_state=42)

In [15]:
only_rtr.fit(X_train, y_train)

RandomForestRegressor(max_depth=10, n_estimators=10, random_state=42)

In [16]:
only_rtr.score(X_train, y_train)

0.863314544465887

In [17]:
only_rtr.score(X_test, y_test)

0.7499748811199882

In [18]:
only_bayesian = BayesianRidge()

In [19]:
only_bayesian.fit(X_train, y_train)

BayesianRidge()

In [20]:
only_bayesian.score(X_train, y_train)

0.6112938796829026

In [21]:
only_bayesian.score(X_test, y_test)

0.5925980771743911