## Voting and Stack Exploration
  
Here we experiment with voting and stack ensemble methods on the MNIST Dataset  
  
We begin with designing a voting classifier from Random Forest, SVM, and Logistic Regression Models

In [3]:
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler

# Get MNIST data

mnist = fetch_mldata('MNIST original')
X = StandardScaler().fit_transform(mnist['data'].astype(np.float))
y = mnist['target']

In [4]:
from sklearn.model_selection import train_test_split

# Split into train, validation, and test sections

X_val, X_test, y_val, y_test = train_test_split(X,y, test_size=10000)
X_train, X_val, y_train, y_val = train_test_split(X_val,y_val, test_size=10000)

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

# Train independent models

for_clf = RandomForestClassifier(n_estimators=15).fit(X_train,y_train)
svm_clf = LinearSVC().fit(X_train,y_train)
log_clf = LogisticRegression().fit(X_train,y_train)

estimators = [for_clf, svm_clf, log_clf]



In [6]:
# View models performance

[model.score(X_val,y_val) for model in estimators]

[0.9546, 0.912, 0.9203]

In [10]:
from sklearn.ensemble import VotingClassifier

# Create a voting classifier

vot_clf = VotingClassifier([('for',for_clf),
                            ('svm',svm_clf),
                            ('log',log_clf)])
vot_clf.fit(X_train,y_train)



VotingClassifier(estimators=[('for', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_w...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))],
         flatten_transform=None, n_jobs=None, voting='hard', weights=None)

In [11]:
# Voting performance

vot_clf.score(X_val,y_val)

0.926

In [14]:
# Remove weakest classifier and implement soft voting?

del vot_clf.estimators_[1]
vot_clf.voting = 'soft'
vot_clf.score(X_val,y_val)

0.946