In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn import metrics  
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier 


- Ensemble Learning helps improve machine learning results by combining several models to improve predictive performance compared to a single model.

In [5]:
df=pd.read_csv('Iris.csv')
df=df.drop(columns=['Id'])
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


<b> Max Voting / Voting Classifier </b>
<br>
- The max voting method is generally used for classification problems. In this technique, multiple models are used to make predictions for each data point. The predictions by each model are considered as a ‘vote’. The predictions which we get from the majority of the models are used as the final prediction.

- A Voting Classifier is a machine learning model that trains on an ensemble of numerous models and predicts an output (class) based on their highest probability of chosen class as the output. It simply aggregates the findings of each classifier passed into Voting Classifier and predicts the output class based on the highest majority of voting. The idea is instead of creating separate dedicated models and finding the accuracy for each them, we create a single model which trains by these models and predicts output based on their combined majority of voting for each output class.

In [7]:
le=LabelEncoder()
scaler=StandardScaler()
df['Species']=le.fit_transform(df['Species'])
x=df.iloc[:,0:4]
y=df['Species']
x=scaler.fit_transform(x)

from sklearn.datasets import load_iris
iris = load_iris() 
X = iris.data[:, :4] 
Y = iris.target 
X.shape,Y.shape

((150, 4), (150,))

In [8]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=5)

In [9]:
model1 = tree.DecisionTreeClassifier(random_state=1)
model2 = RandomForestClassifier(n_estimators = 200)
model3 = KNeighborsClassifier(n_neighbors=7)
model = VotingClassifier(estimators=[('dt', model1), ('rf', model2), ('kn',model3)], voting='hard')

<b> Hard Voting</b>: In hard voting, the predicted output class is a class with the highest majority of votes i.e the class which had the highest probability of being predicted by each of the classifiers. Suppose three classifiers predicted the output class(A, A, B), so here the majority predicted A as output. Hence A will be the final prediction.

In [10]:
model.fit(x_train,y_train)
model.score(x_test,y_test)

0.9333333333333333

In [11]:
model1.fit(x_train,y_train)
model1.score(x_test,y_test)

0.9

In [12]:
model2.fit(x_train,y_train)
model2.score(x_test,y_test)

0.9

In [13]:
model3.fit(x_train,y_train)
model3.score(x_test,y_test)

0.9666666666666667

- Trying hard voting with other model combos

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size = 0.20,random_state = 42) 
# Ensemble of Models 
estimator = [] 
estimator.append(('LR',LogisticRegression(solver ='lbfgs',multi_class ='multinomial',max_iter = 200))) 
estimator.append(('SVC', SVC(gamma ='auto', probability = True))) 
estimator.append(('DTC', DecisionTreeClassifier())) 

In [17]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Voting Classifier with hard voting 
hard_voting = VotingClassifier(estimators = estimator, voting ='hard') 
hard_voting.fit(X_train, y_train) 
y_pred = hard_voting.predict(X_test) 

In [19]:
#accuracy_score metric to predict Accuracy 
score = accuracy_score(y_test, y_pred) 
print("Hard Voting Score % d" % score) 

Hard Voting Score  1


<b>Soft Voting</b>: In soft voting, the output class is the prediction based on the average of probability given to that class. Suppose given some input to three models, the prediction probability for class A = (0.30, 0.47, 0.53) and B = (0.20, 0.32, 0.40). So the average for class A is 0.4333 and B is 0.3067, the winner is clearly class A because it had the highest probability averaged by each classifier.

In [20]:
# Voting Classifier with soft voting 
soft_voting = VotingClassifier(estimators = estimator, voting ='soft') 
soft_voting.fit(X_train, y_train) 
y_pred = soft_voting.predict(X_test) 

In [21]:
# Using accuracy_score 
score = accuracy_score(y_test, y_pred) 
print("Soft Voting Score % d" % score) 

Soft Voting Score  1


In practical the output accuracy will be more for soft voting as it is the average probability of the all estimators combined, as for our basic iris dataset we are already overfitting, so there won’t be much difference in output.

### Maxvoting result


In [96]:
y_pred=model.predict(x_test)
y_pred

array([1, 2, 2, 0, 2, 1, 0, 2, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2,
       0, 1, 1, 2, 1, 1, 1, 2])

In [97]:

y_pred1=model1.predict_proba(x_test)
y_pred1

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [98]:

y_pred2=model2.predict_proba(x_test)
y_pred2

array([[0.   , 1.   , 0.   ],
       [0.   , 0.53 , 0.47 ],
       [0.   , 0.005, 0.995],
       [1.   , 0.   , 0.   ],
       [0.   , 0.   , 1.   ],
       [0.005, 0.77 , 0.225],
       [1.   , 0.   , 0.   ],
       [0.   , 0.04 , 0.96 ],
       [0.995, 0.005, 0.   ],
       [0.02 , 0.935, 0.045],
       [0.   , 0.995, 0.005],
       [0.   , 0.675, 0.325],
       [0.   , 0.005, 0.995],
       [0.   , 0.   , 1.   ],
       [1.   , 0.   , 0.   ],
       [0.995, 0.005, 0.   ],
       [0.   , 0.05 , 0.95 ],
       [0.   , 0.   , 1.   ],
       [1.   , 0.   , 0.   ],
       [1.   , 0.   , 0.   ],
       [0.   , 1.   , 0.   ],
       [0.   , 0.   , 1.   ],
       [1.   , 0.   , 0.   ],
       [0.   , 0.76 , 0.24 ],
       [0.   , 1.   , 0.   ],
       [0.   , 0.   , 1.   ],
       [0.   , 0.99 , 0.01 ],
       [0.   , 1.   , 0.   ],
       [0.   , 0.985, 0.015],
       [0.   , 0.   , 1.   ]])

In [99]:

y_pred3=model3.predict_proba(x_test)
y_pred3

array([[0.        , 1.        , 0.        ],
       [0.        , 0.28571429, 0.71428571],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.57142857, 0.42857143],
       [1.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.71428571, 0.28571429],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.28571429, 0.71428571],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [1.

<b>_Averaging_ </b><br>
Multiple predictions are made for each data point in averaging. In this method, we take an average of predictions from all the models and use it to make the final prediction. 
- The simplest way to develop a model averaging ensemble in Keras is to train multiple models on the same dataset then combine the predictions from each of the trained models.
- The scikit-learn class provides the `make_blobs()` function that can be used to create a multi-class classification problem with the prescribed number of samples, input variables, classes, and variance of samples within a class.

We use this problem with 500 examples, with input variables to represent the x and y coordinates of the points and a standard deviation of 2.0 for points within each group. We will use the same random state to ensure that we always get the same 500 points.


directly for probablity of classes:

In [100]:
#Averaging
y_pred_avg=(y_pred1+y_pred2+y_pred3)/3
y_pred_avg

array([[0.        , 1.        , 0.        ],
       [0.        , 0.27190476, 0.72809524],
       [0.        , 0.00166667, 0.99833333],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.00166667, 0.59      , 0.40833333],
       [1.        , 0.        , 0.        ],
       [0.        , 0.20380952, 0.79619048],
       [0.99833333, 0.00166667, 0.        ],
       [0.00666667, 0.97833333, 0.015     ],
       [0.        , 0.99833333, 0.00166667],
       [0.        , 0.79642857, 0.20357143],
       [0.        , 0.00166667, 0.99833333],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [0.99833333, 0.00166667, 0.        ],
       [0.        , 0.11190476, 0.88809524],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [1.

In [101]:
#Weighted averaging
y_pred_wavg=(0.5*y_pred1+0.3*y_pred2+0.2*y_pred3)
y_pred_wavg

array([[0.        , 1.        , 0.        ],
       [0.        , 0.21614286, 0.78385714],
       [0.        , 0.0015    , 0.9985    ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.0015    , 0.431     , 0.5675    ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.12628571, 0.87371429],
       [0.9985    , 0.0015    , 0.        ],
       [0.006     , 0.9805    , 0.0135    ],
       [0.        , 0.9985    , 0.0015    ],
       [0.        , 0.84535714, 0.15464286],
       [0.        , 0.0015    , 0.9985    ],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [0.9985    , 0.0015    , 0.        ],
       [0.        , 0.07214286, 0.92785714],
       [0.        , 0.        , 1.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [1.

directly for classes:

In [102]:
Y_pred1=model1.predict(x_test)
Y_pred2=model2.predict(x_test)
Y_pred3=model3.predict(x_test)
Y_AVG=(Y_pred1+Y_pred2+Y_pred3)/3
Y_AVG

array([1.        , 1.66666667, 2.        , 0.        , 2.        ,
       1.33333333, 0.        , 1.66666667, 0.        , 1.        ,
       1.        , 1.        , 2.        , 2.        , 0.        ,
       0.        , 2.        , 2.        , 0.        , 0.        ,
       1.        , 2.        , 0.        , 1.        , 1.        ,
       2.        , 1.        , 1.        , 1.        , 2.        ])

In [103]:
Y_WAVG=(0.5*Y_pred1+0.3*Y_pred2+0.2*Y_pred3)
Y_WAVG

array([1. , 1.7, 2. , 0. , 2. , 1.5, 0. , 1.8, 0. , 1. , 1. , 1. , 2. ,
       2. , 0. , 0. , 2. , 2. , 0. , 0. , 1. , 2. , 0. , 1. , 1. , 2. ,
       1. , 1. , 1. , 2. ])