In [1]:
from sklearnex import patch_sklearn
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# Ensemble Algorithm
## Voting Ensemble Classifier SKLearn

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


# Data
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
# import the data set and perform the split
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# set up three different classifier
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

# set up the voting classifier
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

In [6]:
# train the model
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

In [7]:
# evaluate the different models
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.816
SVC 0.896
VotingClassifier 0.88


## Bagging

In [8]:
# a bagging application on DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500, # n_estimators: the number of estimators to create
    max_samples=100, bootstrap=True, n_jobs=-1) # max_samples: 100 training instances, boostrap: replacement, n_jobs: CPU cores
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [9]:
# check the oob evaluation
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500, # n_estimators: the number of estimators to create
    max_samples=100, bootstrap=True, n_jobs=-1, # max_samples: 100 training instances, boostrap: replacement, n_jobs: CPU cores
    oob_score = True) 
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.9226666666666666

In [10]:
# verify the evaluation on the test set
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.912

## Random Forest
A model utilize bagging!

In [11]:
# following is the application of random forest
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators = 500, max_leaf_nodes = 16, n_jobs = -1)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)

In [14]:
# the following classifier is the same as the above Random Forest Classifier
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(max_features='auto', max_leaf_nodes = 16),
    n_estimators = 500, max_samples=1.0, bootstrap=True, n_jobs = -1)

In [16]:
# application to check the random forest feature importance
from sklearn.datasets import load_iris
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris['data'], iris['target'])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.06843481038210267
sepal width (cm) 0.021099092972385457
petal length (cm) 0.36599466156313115
petal width (cm) 0.5444714350823807


## AdaBoost

In [17]:
# application of AdaBoost
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm="SAMME.R", learning_rate=0.5)
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)