In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, VotingRegressor

In [2]:
X,y = load_diabetes(return_X_y=True)

In [3]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knnr = DecisionTreeRegressor()
rfr = RandomForestRegressor()

In [4]:
estimators = [('Linear Regressor',lr),('Decision tree',dt),('KNN Regressor',knnr), ('Random Forest Regressor', rfr)]

In [5]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

Linear Regressor 0.46
Decision tree -0.25
KNN Regressor -0.29
Random Forest Regressor 0.39


In [6]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.27


In [7]:
for i in range(1,3):
    for j in range(1,3):
        for k in range(1,3):
            for l in range(1,3):
                vc = VotingRegressor(estimators=estimators, weights=[i,j,k,l])
                x = cross_val_score(vc,X,y,cv=10,scoring='r2')
                print("for i={}, j={}, k={}, l={}".format(i,j,k,l), np.round(np.mean(x),2))            

for i=1, j=1, k=1, l=1 0.24
for i=1, j=1, k=1, l=2 0.3
for i=1, j=1, k=2, l=1 0.2
for i=1, j=1, k=2, l=2 0.26
for i=1, j=2, k=1, l=1 0.18
for i=1, j=2, k=1, l=2 0.23
for i=1, j=2, k=2, l=1 0.13
for i=1, j=2, k=2, l=2 0.18
for i=2, j=1, k=1, l=1 0.33
for i=2, j=1, k=1, l=2 0.35
for i=2, j=1, k=2, l=1 0.27
for i=2, j=1, k=2, l=2 0.31
for i=2, j=2, k=1, l=1 0.28
for i=2, j=2, k=1, l=2 0.29
for i=2, j=2, k=2, l=1 0.21
for i=2, j=2, k=2, l=2 0.26


## Voting Regression for Same Model

In [8]:
dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [9]:
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [10]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

dt1 0.15
dt2 0.28
dt3 0.16
dt4 -0.05
dt5 -0.23


In [11]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.23


`Note`

This notebook focuses on understanding the implementation of voting ensembles for classification and regression.
For voting ensembles to work effectively, two key assumptions should be met:

1. The base models should be independent (make different kinds of errors).

2. Each model should perform better than random guessing (accuracy > 50% for classification).

Since these conditions were not fully met here, the ensemble results may not show much improvement â€” which is expected for this demonstration.