In [1]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, cross_val_score, cross_val_predict
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix

In [2]:
digit = load_digits()

score_board = {'Classifier': [], 'Accuracy': []}

In [3]:
X = digit.data
y = digit.target

spliter = StratifiedShuffleSplit(n_splits = 3, test_size = 0.2)

In [4]:
for train_index, test_index in spliter.split(X, y):
    strat_train_x = X[train_index]
    strat_train_y = y[train_index]
    strat_test_x = X[test_index]
    strat_test_y = y[test_index]

In [5]:
log_reg = LogisticRegression()
grid_model = GridSearchCV(log_reg, param_grid = {'max_iter': [4000, 5000, 6000]}, cv = 3)
grid_model.fit(strat_train_x, strat_train_y)

GridSearchCV(cv=3, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'max_iter': [4000, 5000, 6000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [6]:
grid_model.best_params_

{'max_iter': 4000}

In [7]:
clf1 = LogisticRegression(**grid_model.best_params_)

In [8]:
score_board['Classifier'].append(clf1.__class__.__name__)
score_board['Accuracy'].append(cross_val_score(clf1, strat_train_x, strat_train_y, cv = 3))

In [9]:
tree_clf = DecisionTreeClassifier()
grid_model = GridSearchCV(tree_clf, param_grid = {'max_depth': [5, 6, 7, 8], 'max_leaf_nodes': [20, 22, 24, 25]}, cv = 3)
grid_model.fit(strat_train_x, strat_train_y)

GridSearchCV(cv=3, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=None,
                                              splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'max_depth': [5, 6, 7, 8],
                         '

In [10]:
grid_model.best_params_

{'max_depth': 8, 'max_leaf_nodes': 25}

In [11]:
clf2 = DecisionTreeClassifier(**grid_model.best_params_)

In [12]:
score_board['Classifier'].append(clf2.__class__.__name__)
score_board['Accuracy'].append(cross_val_score(clf2, strat_train_x, strat_train_y, cv = 3))

In [13]:
svm_clf = SVC(probability = True)
grid_model = GridSearchCV(svm_clf, param_grid = {'C': [3, 5, 7], 'degree': [2, 3, 4]})
grid_model.fit(strat_train_x, strat_train_y)

GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=True, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [3, 5, 7], 'degree': [2, 3, 4]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [14]:
grid_model.best_params_

{'C': 5, 'degree': 2}

In [15]:
clf3 = SVC(**grid_model.best_params_, probability = True)

In [16]:
score_board['Classifier'].append(clf3.__class__.__name__)
score_board['Accuracy'].append(cross_val_score(clf3, strat_train_x, strat_train_y, cv = 3))

In [17]:
forest_clf = RandomForestClassifier()
grid_model = GridSearchCV(forest_clf, param_grid = {'max_depth': [6, 7, 8, 9, 10], 'max_leaf_nodes': [22, 23, 24, 25, 26], 'n_estimators': [20, 30, 50, 70]})
grid_model.fit(strat_train_x, strat_train_y)

GridSearchCV(cv=None, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              ra

In [18]:
grid_model.best_params_

{'max_depth': 10, 'max_leaf_nodes': 25, 'n_estimators': 50}

In [19]:
clf4 = RandomForestClassifier(**grid_model.best_params_)

In [20]:
score_board['Classifier'].append(clf4.__class__.__name__)
score_board['Accuracy'].append(cross_val_score(clf4, strat_train_x, strat_train_y, cv = 3))

In [21]:
clf5 = VotingClassifier([('c1', clf1), ('c2', clf2), ('c3', clf3), ('c4', clf4)], voting = 'soft')

In [22]:
score_board['Classifier'].append(clf5.__class__.__name__)
score_board['Accuracy'].append(cross_val_score(clf5, strat_train_x, strat_train_y, cv = 3))

In [23]:
score_board_df = pd.DataFrame(score_board)
score_board_df

Unnamed: 0,Classifier,Accuracy
0,LogisticRegression,"[0.9603340292275574, 0.954070981210856, 0.9519..."
1,DecisionTreeClassifier,"[0.7724425887265136, 0.8162839248434238, 0.774..."
2,SVC,"[0.9853862212943633, 0.9853862212943633, 0.995..."
3,RandomForestClassifier,"[0.9102296450939458, 0.9269311064718163, 0.926..."
4,VotingClassifier,"[0.964509394572025, 0.9749478079331941, 0.9686..."
