In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

In [2]:
mnist = load_digits()

In [3]:
xtrain, xtest, ytrain, ytest = train_test_split(mnist.data, mnist.target)

In [4]:
print(xtrain.shape, ytrain.shape)
print(xtest.shape, ytest.shape)

(1347, 64) (1347,)
(450, 64) (450,)


In [5]:
from sklearn.tree import DecisionTreeClassifier

In [6]:
dt = DecisionTreeClassifier()

In [7]:
dt.fit(xtrain, ytrain)

DecisionTreeClassifier()

In [8]:
dt.score(xtest, ytest)

0.88

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [17]:
rf = RandomForestClassifier(n_estimators=20)

In [18]:
rf.fit(xtrain, ytrain)

RandomForestClassifier(n_estimators=20)

In [19]:
rf.score(xtest, ytest)

0.9866666666666667

In [20]:
from sklearn.ensemble import AdaBoostClassifier

In [21]:
ab = AdaBoostClassifier(dt, n_estimators=5, learning_rate=1)

In [22]:
ab.fit(xtrain, ytrain)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), learning_rate=1,
                   n_estimators=5)

In [23]:
ab.score(xtest, ytest)

0.8666666666666667

In [24]:
from sklearn.ensemble import VotingClassifier

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [26]:
lr = LogisticRegression()
svm = SVC(kernel='poly', degree=2)

In [27]:
vc = VotingClassifier(estimators=[('lr', lr), ('dt', dt), ('svm', svm)], 
                     voting='hard')

In [30]:
# vc.fit(xtrain, ytrain)

In [29]:
vc.score(xtest, ytest)

0.9977777777777778

In [31]:
from sklearn.model_selection import GridSearchCV

In [38]:
param_grid = [
    
    {'n_estimators': [5, 10, 20], 'max_features': [2, 4, 6, 8]},
    {'bootstrap': [False], 'n_estimators': [5, 10], 'max_features': [3, 4, 5]}
    
]

In [39]:
grid_rf = RandomForestClassifier(random_state=42)

In [41]:
grid_search = GridSearchCV(grid_rf, param_grid, scoring='neg_mean_squared_error', return_train_score=True)

In [42]:
grid_search.fit(xtrain, ytrain)

GridSearchCV(estimator=RandomForestClassifier(random_state=42),
             param_grid=[{'max_features': [2, 4, 6, 8],
                          'n_estimators': [5, 10, 20]},
                         {'bootstrap': [False], 'max_features': [3, 4, 5],
                          'n_estimators': [5, 10]}],
             return_train_score=True, scoring='neg_mean_squared_error')

In [43]:
grid_search.best_params_

{'max_features': 8, 'n_estimators': 20}

In [44]:
grid_search.best_estimator_

RandomForestClassifier(max_features=8, n_estimators=20, random_state=42)

In [45]:
rf  = RandomForestClassifier(max_features=8, n_estimators=20, random_state=42)

In [46]:
rf.fit(xtrain, ytrain)

RandomForestClassifier(max_features=8, n_estimators=20, random_state=42)

In [47]:
rf.score(xtest, ytest)

0.9777777777777777