# **Voting:** Scikit (Regression)
A type of ensemble learning where final prediction is made by majority voting of multiple models.<br>

### **Rules for it to work:**
	- All base models should be independent in nature (Randomness among them)
	- No base model should’ve accuracy score less than 0.5 (50%)

In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv("boston.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [9]:
X = df.iloc[:,:13]
y = df.iloc[:,-1]

print(X.shape, y.shape)

(506, 13) (506,)


In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score

In [18]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
svr = SVR()

estimators = [('Linear Regression: ',lr),('Decision Tree: ',dt),('SVR: ',svr)]

for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

Linear Regression:  0.2
Decision Tree:  -0.19
SVR:  -0.41


In [19]:
from sklearn.ensemble import VotingRegressor

vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor: ",np.round(np.mean(scores),2))

Voting Regressor:  0.4


In [16]:
for i in range(1,4):
  for j in range(1,4):
    for k in range(1,4):
      vr = VotingRegressor(estimators,weights=[i,j,k])
      scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
      print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))


For i=1,j=1,k=1 0.44
For i=1,j=1,k=2 0.36
For i=1,j=1,k=3 0.27
For i=1,j=2,k=1 0.41
For i=1,j=2,k=2 0.4
For i=1,j=2,k=3 0.32
For i=1,j=3,k=1 0.36
For i=1,j=3,k=2 0.39
For i=1,j=3,k=3 0.38
For i=2,j=1,k=1 0.46
For i=2,j=1,k=2 0.38
For i=2,j=1,k=3 0.35
For i=2,j=2,k=1 0.45
For i=2,j=2,k=2 0.45
For i=2,j=2,k=3 0.41
For i=2,j=3,k=1 0.37
For i=2,j=3,k=2 0.38
For i=2,j=3,k=3 0.43
For i=3,j=1,k=1 0.45
For i=3,j=1,k=2 0.43
For i=3,j=1,k=3 0.4
For i=3,j=2,k=1 0.42
For i=3,j=2,k=2 0.46
For i=3,j=2,k=3 0.39
For i=3,j=3,k=1 0.34
For i=3,j=3,k=2 0.45
For i=3,j=3,k=3 0.44


In [20]:
# using the same algorithm

dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=None)

In [23]:
estimators = [('Decision Tree 1: ',dt1),('Decision Tree 2: ',dt2),('Decision Tree 3: ',dt3),('Decision Tree 4: ',dt4)]

In [24]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

Decision Tree 1:  -0.85
Decision Tree 2:  -0.11
Decision Tree 3:  -0.01
Decision Tree 4:  -0.38


In [25]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor: ",np.round(np.mean(scores),2))

Voting Regressor:  0.13
