<a href="https://colab.research.google.com/github/Jo-chana/CoLab_ML_codes/blob/master/ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
import numpy as np


In [0]:
mnist = datasets.fetch_openml('mnist_784', version=1)
X = mnist['data']
y = mnist['target']

In [0]:
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [0]:
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train)

In [10]:
randomforest_clf = RandomForestClassifier()
randomforest_clf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [0]:
def accuracy(X_test,y_test,model):
  y_pred = model.predict(X_test)
  accuracy = np.mean(y_pred == y_test)
  return accuracy

In [12]:
accuracy(X_val, y_val, randomforest_clf)

0.9660190476190477

In [13]:
extra_tree_clf = ExtraTreesClassifier()
extra_tree_clf.fit(X_train,y_train)

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100,
                     n_jobs=None, oob_score=False, random_state=None, verbose=0,
                     warm_start=False)

In [14]:
accuracy(X_val,y_val,extra_tree_clf)

0.9687619047619047

In [22]:
svm_clf = SVC(probability=True)
svm_clf.fit(X_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [16]:
accuracy(X_val,y_val,svm_clf)

0.9744

In [17]:
# Hard voting ensemble

hard_voting_clf = VotingClassifier(
    estimators=[('randomforest',randomforest_clf),('extratree',extra_tree_clf),('svc',svm_clf)],
    voting='hard'
)
hard_voting_clf.fit(X_train,y_train)

VotingClassifier(estimators=[('randomforest',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0

In [18]:
accuracy(X_val, y_val, hard_voting_clf)

0.9706666666666667

In [23]:
# soft voting ensemble

soft_voting_clf = VotingClassifier(
    estimators=[('randomforest',randomforest_clf),('extratree',extra_tree_clf),('svc',svm_clf)],
    voting='soft'
)
soft_voting_clf.fit(X_train,y_train)

VotingClassifier(estimators=[('randomforest',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0

In [24]:
accuracy(X_val,y_val,soft_voting_clf)

0.9756190476190476

In [0]:
# blender training

random_forest_blender = RandomForestClassifier(n_estimators=200,random_state=42)
estimators = [randomforest_clf,extra_tree_clf,svm_clf]
X_predictions = np.empty((len(X_val), len(estimators)),dtype=np.float32)

for index, estimator in enumerate(estimators):
  X_predictions[:,index] = estimator.predict(X_val)

In [29]:
random_forest_blender.fit(X_predictions, y_val)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [0]:
# testing

X_test_pred = np.empty((len(X_test),len(estimators)),dtype=np.float32)
for index, estimator in enumerate(estimators):
  X_test_pred[:,index] = estimator.predict(X_test)



In [39]:
accuracy(X_test_pred,y_test,random_forest_blender)

0.9729714285714286