<h1>Example Ensemble Learning with 3 classifiers</h1>

Code adapted from "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools and Techniques to Build Intelligent Systems" by Aurélien Géron


In [51]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

Use the [sklearn Moons dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html) for testing this classifer approach

In [64]:
from sklearn.datasets import make_moons

data_size = 100 # change to get more instances in the training / test sets
test_set_ratio = 0.2 # proportion of the data to allocate to the test set

X,y = make_moons(n_samples=data_size, noise=0.1)

#generate training and test datasets
random_indices = np.random.permutation(len(X))
test_set_size = int(len(X) * test_set_ratio)
train_indices = random_indices[test_set_size:]
test_indices = random_indices[:test_set_size]
X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]

Set up the classifiers.

In [65]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)), ('rf', RandomF...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         n_jobs=1, voting='hard', weights=None)

Check the Ensemble's accuracy score.

In [66]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.8
RandomForestClassifier 0.85
SVC 0.95
VotingClassifier 0.9
