In [1]:
# selecting best algorithm and hyper parameter tuning for digits dataset
from sklearn.datasets import load_digits
digits = load_digits()
dir(digits)

['DESCR', 'data', 'images', 'target', 'target_names']

In [2]:
# importing all the models that need to be tested
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [5]:
model_params = {'svm':{
               'model':SVC(gamma = 'auto'),
                'params':{
                          'C':[1,10,20],
                         'kernel':['linear', 'rbf', 'sigmoid']}
},
               'random_forest':{
               'model':RandomForestClassifier(),
               'params':{
                       'n_estimators':[10,20,30],
                       'criterion':['gini','entropy']}
               },
               'logistic_regression':{
                   'model':LogisticRegression(),
                   'params':{
                     'C':[1,10,20]}
               },
               'GaussianNB':{
                   'model':GaussianNB(),
                   'params':{
                     'var_smoothing':[1*(10**-9), 1*(10**-10)]}
               },
               'MultinomialNB':{
                   'model':MultinomialNB(),
                   'params': {
                       'alpha':[1,2,3]}
               },
               'DecisionTree':{
                   'model':DecisionTreeClassifier(),
                   'params':{
                       'criterion':['gini','entropy']}
               }}

In [9]:
# importing GridSearchCV for hyper parameter tuning
from sklearn.model_selection import GridSearchCV
scores = []
for model,mod_param in model_params.items():
    clf = GridSearchCV(mod_param['model'], mod_param['params'], cv = 5, return_train_score = False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model':model,
        'best_parameter':clf.best_params_,
        'best_score':clf.best_score_
    })
scores

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#log

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[{'model': 'svm',
  'best_parameter': {'C': 1, 'kernel': 'linear'},
  'best_score': 0.9476973073351903},
 {'model': 'random_forest',
  'best_parameter': {'criterion': 'entropy', 'n_estimators': 30},
  'best_score': 0.93491643454039},
 {'model': 'logistic_regression',
  'best_parameter': {'C': 1},
  'best_score': 0.9137650882079852},
 {'model': 'GaussianNB',
  'best_parameter': {'var_smoothing': 1e-09},
  'best_score': 0.8069281956050759},
 {'model': 'MultinomialNB',
  'best_parameter': {'alpha': 3},
  'best_score': 0.8720210461157537},
 {'model': 'DecisionTree',
  'best_parameter': {'criterion': 'entropy'},
  'best_score': 0.8169405756731661}]

In [11]:
import pandas as pd
pd.DataFrame(data = scores)

Unnamed: 0,model,best_parameter,best_score
0,svm,"{'C': 1, 'kernel': 'linear'}",0.947697
1,random_forest,"{'criterion': 'entropy', 'n_estimators': 30}",0.934916
2,logistic_regression,{'C': 1},0.913765
3,GaussianNB,{'var_smoothing': 1e-09},0.806928
4,MultinomialNB,{'alpha': 3},0.872021
5,DecisionTree,{'criterion': 'entropy'},0.816941


## So we can say that SVM with C = 1 and kernel = linear is the best algorith and hyper parameters for our digits dataset