For digits dataset in sklearn.dataset, please try following classifiers and find out the one that gives best performance. Also find the optimal parameters for that classifier.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import load_digits

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [2]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [3]:
df = pd.DataFrame(data=digits.data)
df['target'] = digits.target
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


In [34]:
model_params = {
    'svm': {
        'model': SVC(),
        'params' : {
            'C': [1, 10, 20, 30, 50, 80, 100],
            'kernel': ['rbf', 'linear', 'poly', 'sigmoid'],
            'gamma': ['auto', 'scale'],
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1, 5, 10, 20, 30, 40, 50, 60, 80, 100],
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(max_iter=1000),
        'params': {
            'C': [1, 5, 10, 20, 50, 80, 100],
            'solver': ['liblinear'],
        }
    },
    'gaussian_naive_bayes' : {
        'model': GaussianNB(),
        'params': {}
    },
    'multinomial_naive_bayes': {
        'model': MultinomialNB(),
        'params' : {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params' : {
            'criterion': ['gini', 'entropy', 'log_loss'],
            'splitter': ['best', 'random'],
        }
    },
}

In [35]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df_results = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df_results

Unnamed: 0,model,best_score,best_params
0,svm,0.97385,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}"
1,random_forest,0.941031,{'n_estimators': 60}
2,logistic_regression,0.922114,"{'C': 1, 'solver': 'liblinear'}"
3,gaussian_naive_bayes,0.806928,{}
4,multinomial_naive_bayes,0.87035,{}
5,decision_tree,0.826391,"{'criterion': 'log_loss', 'splitter': 'best'}"


SVC -> C=10, gamma=scale, kernel=rbf -> Score = 0.9738

In [38]:
scores = []

for model_name, mp in model_params.items():
    clf =  RandomizedSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False, n_iter=1)
    clf.fit(digits.data, digits.target)
    
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df_results = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df_results

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'kernel': 'linear', 'gamma': 'auto', 'C': 100}"
1,random_forest,0.859253,{'n_estimators': 5}
2,logistic_regression,0.913771,"{'solver': 'liblinear', 'C': 50}"
3,gaussian_naive_bayes,0.806928,{}
4,multinomial_naive_bayes,0.87035,{}
5,decision_tree,0.788024,"{'splitter': 'best', 'criterion': 'gini'}"
