<a href="https://www.kaggle.com/code/bandhansingh/gridsearchcv-in-digits-dataset?scriptVersionId=93658209" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# For digits dataset in sklearn.dataset, please try following classifiers and find out the one that gives best performance. Also find the optimal parameters for that classifier.

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [2]:
from sklearn.datasets import load_digits
digits = load_digits()

dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [3]:
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['digit'] = digits.target
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,digit
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [4]:
model_params = {
    'svm' : {
        'model' : svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel' : ['rbf','linear']
        }
    },
    
    'random_forest' :{
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,10,20]
        }
    },
    
    'logistic_regression' :{
        'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
        'params' : {
            'C': [1,5,10,15]
        }
    },
    
    'decision_tree' :{
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion': ['gini','entropy'],
        }
    },
    
    'gaussian_nb' :{
        'model' : GaussianNB(),
        'params' : {}
    },
    
    'multinomial_nb' :{
        'model' : MultinomialNB(),
        'params' : {}
    }
}

In [5]:
model_params.items()

dict_items([('svm', {'model': SVC(gamma='auto'), 'params': {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}}), ('random_forest', {'model': RandomForestClassifier(), 'params': {'n_estimators': [1, 10, 20]}}), ('logistic_regression', {'model': LogisticRegression(solver='liblinear'), 'params': {'C': [1, 5, 10, 15]}}), ('decision_tree', {'model': DecisionTreeClassifier(), 'params': {'criterion': ['gini', 'entropy']}}), ('gaussian_nb', {'model': GaussianNB(), 'params': {}}), ('multinomial_nb', {'model': MultinomialNB(), 'params': {}})])

In [6]:
from sklearn.model_selection import GridSearchCV

scores = []

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df_best_model = pd.DataFrame(scores)
df_best_model

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.914889,{'n_estimators': 20}
2,logistic_regression,0.922114,{'C': 1}
3,decision_tree,0.813595,{'criterion': 'entropy'}
4,gaussian_nb,0.806928,{}
5,multinomial_nb,0.87035,{}


**Here the svm model with 94.7% score is doing good as per the parameters that i have passed**