In [17]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits

In [18]:
digits = load_digits()

In [19]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [20]:
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,4.0,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0


## Hyper Parameter Turning

In [21]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [22]:
model_params = {
    'svm' : {
        'model': svm.SVC(gamma='auto'),
        'params': {
            "C": [1,10,20]
        }
    },
    'random_forest':{
        'model': RandomForestClassifier(),
        'params':{
            'n_estimators' : [1,5,10]
        }
    },
    'gaussian': {
        'model': GaussianNB(),
        'params': {
            'var_smoothing': np.logspace(0, -9)

        }
    },
    'multinomial': {
        'model': MultinomialNB(),
        'params' : {'alpha': [0.01, 0.1, 0.5, 1.0, 10.0, ],
          'fit_prior': [True, False],
         }
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'max_depth': [3,6,9]
        }
    }
}

In [26]:
from sklearn.model_selection import GridSearchCV


scores = []

for model_name,mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5,
    return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'models': model_name,
        'best_score' :clf.best_score_,
        'best_params': clf.best_params_
    })

In [28]:
df = pd.DataFrame(scores, columns=['models', 'best_score', 'best_params'])
df

Unnamed: 0,models,best_score,best_params
0,svm,0.476366,{'C': 10}
1,random_forest,0.908193,{'n_estimators': 10}
2,gaussian,0.88983,{'var_smoothing': 0.07906043210907697}
3,multinomial,0.874246,"{'alpha': 10.0, 'fit_prior': True}"
4,decision_tree,0.786908,{'max_depth': 9}


In [29]:
from sklearn.model_selection import train_test_split

In [31]:
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3, random_state=10)

In [32]:
model = RandomForestClassifier(n_estimators=10)

In [33]:
model.fit(X_train, y_train)

RandomForestClassifier(n_estimators=10)

In [35]:
model.score(X_test, y_test)

0.9333333333333333