You can use “VotingClassifier” module in sklearn as follows:

In [1]:
from sklearn.ensemble import VotingClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# Cargar el conjunto de datos Iris
iris = load_iris()
X, y = iris.data, iris.target

# Dividir en conjuntos de entrenamiento y prueba
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:

model1 = LogisticRegression(random_state=1)
model2 = DecisionTreeClassifier(random_state=1)
model = VotingClassifier(estimators=[('lr', model1), ('dt', model2)], voting='hard')
model.fit(x_train,y_train)
model.score(x_test,y_test)

1.0

Similar to the max voting technique, multiple predictions are made for each data point in averaging. In this method, we take an average of predictions from all the models and use it to make the final prediction.

In [3]:
model1 = DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1+pred2+pred3)/3
finalpred

array([[1.26797583e-03, 9.42579888e-01, 5.61521363e-02],
       [9.82324338e-01, 1.76755953e-02, 6.62553491e-08],
       [2.95387167e-09, 5.16184400e-04, 9.99483813e-01],
       [2.16196874e-03, 9.30742779e-01, 6.70952525e-02],
       [4.85938853e-04, 9.24709422e-01, 7.48046388e-02],
       [9.85300385e-01, 1.46995560e-02, 5.86807397e-08],
       [2.60062897e-02, 9.69232705e-01, 4.76100505e-03],
       [5.46298417e-05, 5.15184806e-02, 9.48426890e-01],
       [7.41347624e-04, 7.87617299e-01, 2.11641353e-01],
       [9.48022205e-03, 9.81911540e-01, 8.60823821e-03],
       [1.48510453e-04, 1.47519137e-01, 8.52332352e-01],
       [9.89374892e-01, 1.06250824e-02, 2.59849037e-08],
       [9.91002140e-01, 8.99784938e-03, 1.10604416e-08],
       [9.87302031e-01, 1.26979319e-02, 3.69004893e-08],
       [9.93095417e-01, 6.90456199e-03, 2.14844622e-08],
       [1.53176240e-03, 9.04124069e-01, 9.43441689e-02],
       [2.43912299e-06, 8.04585545e-03, 9.91951705e-01],
       [9.12084405e-03, 9.82588

Weighted Average:


This is an extension of the averaging method. All models are assigned different weights defining the importance of each model for prediction. 

In [4]:
model1 = DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1*0.3+pred2*0.3+pred3*0.4)
finalpred

array([[1.52157100e-03, 9.31095865e-01, 6.73825635e-02],
       [9.78789206e-01, 2.12107143e-02, 7.95064189e-08],
       [3.54464600e-09, 6.19421280e-04, 9.99380575e-01],
       [2.59436248e-03, 9.16891335e-01, 8.05143030e-02],
       [5.83126624e-04, 9.09651307e-01, 8.97655665e-02],
       [9.82360462e-01, 1.76394672e-02, 7.04168876e-08],
       [3.12075476e-02, 9.63079246e-01, 5.71320606e-03],
       [6.55558100e-05, 6.18221767e-02, 9.38112267e-01],
       [8.89617149e-04, 7.85140759e-01, 2.13969624e-01],
       [1.13762665e-02, 9.78293848e-01, 1.03298859e-02],
       [1.78212544e-04, 1.57022964e-01, 8.42798823e-01],
       [9.87249870e-01, 1.27500989e-02, 3.11818845e-08],
       [9.89202567e-01, 1.07974193e-02, 1.32725299e-08],
       [9.84762437e-01, 1.52375183e-02, 4.42805872e-08],
       [9.91714500e-01, 8.28547439e-03, 2.57813546e-08],
       [1.83811488e-03, 8.84948882e-01, 1.13213003e-01],
       [2.92694759e-06, 9.65502654e-03, 9.90342047e-01],
       [1.09450129e-02, 9.79106

Stacking is an ensemble learning technique that uses predictions from multiple models (for example decision tree, knn or svm) to build a new model. 

We first define a function to make predictions on n-folds of train and test dataset. This function returns the predictions for train and test for each model.

In [12]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
import pandas as pd

In [11]:
def Stacking(model,train,y,test,n_fold):
   folds=StratifiedKFold(n_splits=n_fold,random_state=1)
   test_pred=np.empty((test.shape[0],1),float)
   train_pred=np.empty((0,1),float)
   for train_indices,val_indices in folds.split(train,y.values):
      x_train,x_val=train.iloc[train_indices],train.iloc[val_indices]
      y_train,y_val=y.iloc[train_indices],y.iloc[val_indices]

      model.fit(X=x_train,y=y_train)
      train_pred=np.append(train_pred,model.predict(x_val))
      test_pred=np.append(test_pred,model.predict(test))
   return test_pred.reshape(-1,1),train_pred


Now we’ll create two base models – decision tree and knn.

In [None]:
model1 = tree.DecisionTreeClassifier(random_state=1)

test_pred1 ,train_pred1=Stacking(model=model1,n_fold=10, train=x_train,test=x_test,y=y_train)

train_pred1=pd.DataFrame(train_pred1)
test_pred1=pd.DataFrame(test_pred1)

In [None]:
model2 = KNeighborsClassifier()

test_pred2 ,train_pred2=Stacking(model=model2,n_fold=10,train=x_train,test=x_test,y=y_train)

train_pred2=pd.DataFrame(train_pred2)
test_pred2=pd.DataFrame(test_pred2)

Create a third model, logistic regression, on the predictions of the decision tree and knn models.

In [None]:
df = pd.concat([train_pred1, train_pred2], axis=1)
df_test = pd.concat([test_pred1, test_pred2], axis=1)

model = LogisticRegression(random_state=1)
model.fit(df,y_train)
model.score(df_test, y_test)

Bagging

The idea behind bagging is combining the results of multiple models (for instance, all decision trees) to get a generalized result.

Loan Prediction

In [None]:
#importing important packages
import pandas as pd
import numpy as np

#reading the dataset
df=pd.read_csv("....../train.csv")

#filling missing values
df['Gender'].fillna('Male', inplace=True)

In [None]:
#split dataset into train and test

from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.3, random_state=0)

x_train=train.drop('Loan_Status',axis=1)
y_train=train['Loan_Status']

x_test=test.drop('Loan_Status',axis=1)
y_test=test['Loan_Status']

#create dummies
x_train=pd.get_dummies(x_train)
x_test=pd.get_dummies(x_test)

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn import tree
model = BaggingClassifier(tree.DecisionTreeClassifier(random_state=1))
model.fit(x_train, y_train)
model.score(x_test,y_test)
#0.75135135135135134

In [None]:
from sklearn.ensemble import BaggingRegressor
model = BaggingRegressor(tree.DecisionTreeRegressor(random_state=1))
model.fit(x_train, y_train)
model.score(x_test,y_test)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier(random_state=1)
model.fit(x_train, y_train)
model.score(x_test,y_test)
#0.81081081081081086

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model= GradientBoostingClassifier(learning_rate=0.01,random_state=1)
model.fit(x_train, y_train)
model.score(x_test,y_test)
#0.81621621621621621

In [None]:
import xgboost as xgb
model=xgb.XGBClassifier(random_state=1,learning_rate=0.01)
model.fit(x_train, y_train)
model.score(x_test,y_test)
#0.82702702702702702