In [1]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [2]:
X,y = fetch_california_housing(return_X_y=True)

In [3]:
X.shape , y.shape

((20640, 8), (20640,))

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score

In [5]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
dt2 = DecisionTreeRegressor()

In [6]:
estimators = [('lr',lr),('dt',dt),('dt2',dt2)]

In [7]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

lr 0.51
dt 0.24
dt2 0.23


In [8]:
from sklearn.ensemble import VotingRegressor

In [9]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.48


In [10]:
## weighted voting
for i in range(1,4):
  for j in range(1,4):
    for k in range(1,4):
      vr = VotingRegressor(estimators,weights=[i,j,k])
      scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
      print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))

For i=1,j=1,k=1 0.48
For i=1,j=1,k=2 0.44
For i=1,j=1,k=3 0.41
For i=1,j=2,k=1 0.44
For i=1,j=2,k=2 0.41
For i=1,j=2,k=3 0.39
For i=1,j=3,k=1 0.41
For i=1,j=3,k=2 0.4
For i=1,j=3,k=3 0.38
For i=2,j=1,k=1 0.55
For i=2,j=1,k=2 0.51
For i=2,j=1,k=3 0.48
For i=2,j=2,k=1 0.51
For i=2,j=2,k=2 0.49
For i=2,j=2,k=3 0.46
For i=2,j=3,k=1 0.48
For i=2,j=3,k=2 0.46
For i=2,j=3,k=3 0.44
For i=3,j=1,k=1 0.56
For i=3,j=1,k=2 0.54
For i=3,j=1,k=3 0.52
For i=3,j=2,k=1 0.55
For i=3,j=2,k=2 0.52
For i=3,j=2,k=3 0.5
For i=3,j=3,k=1 0.52
For i=3,j=3,k=2 0.5
For i=3,j=3,k=3 0.48


In [11]:
# using the same algorithm

dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [12]:
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [13]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

dt1 0.13
dt2 0.36
dt3 0.43
dt4 0.47
dt5 0.25


In [14]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.5
