# Custom Ensemble Techniques

In [3]:
# Making our own synthetic dataset.
from sklearn.datasets import make_classification
X,y = make_classification(n_samples = 1000,n_features=20,n_informative=15, n_redundant=5, random_state=1)

In [7]:
X.shape , y.shape

((1000, 20), (1000,))

In [8]:
from collections import Counter
Counter(y)

Counter({0: 501, 1: 499})

In [13]:
# Loading all the libraries
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score , RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline

In [19]:
models = list() # creating a list where we will be storing all the models

decision_tree = Pipeline([('m',DecisionTreeClassifier())])
models.append(('decision',decision_tree))

random_forest = Pipeline([('m',RandomForestClassifier())])
models.append(('randomforest',random_forest))

svc = Pipeline([('m',SVC())])
models.append(('svc',svc))

# Defining the voting ensemble
ensemble = VotingClassifier(estimators=models, voting='hard')


In [20]:
models

[('decision', Pipeline(steps=[('m', DecisionTreeClassifier())])),
 ('randomforest', Pipeline(steps=[('m', RandomForestClassifier())])),
 ('svc', Pipeline(steps=[('m', SVC())]))]

In [21]:
ensemble

VotingClassifier(estimators=[('decision',
                              Pipeline(steps=[('m',
                                               DecisionTreeClassifier())])),
                             ('randomforest',
                              Pipeline(steps=[('m',
                                               RandomForestClassifier())])),
                             ('svc', Pipeline(steps=[('m', SVC())]))])

In [22]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(ensemble, X, y, scoring='accuracy' , cv = cv , n_jobs=-1)

In [23]:
n_scores

array([0.93, 0.97, 0.94, 0.94, 0.9 , 0.96, 0.92, 0.92, 0.96, 0.95, 0.96,
       0.91, 0.91, 0.94, 0.94, 0.94, 0.95, 0.92, 0.96, 0.97, 0.95, 0.98,
       0.95, 0.97, 0.92, 0.87, 0.97, 0.94, 0.94, 0.91])

In [24]:
n_scores.mean()

0.9396666666666669