In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB

In [3]:
from sklearn.datasets import load_digits
digits = load_digits()

In [9]:
from sklearn.model_selection import GridSearchCV

In [36]:
model_selection = {
    "Decision Tree" : {
        "model" : DecisionTreeClassifier(),
        "params" : {
            "criterion" : ["gini", "entropy"]
        }
    },
    "Random Forest" : {
        "model" : RandomForestClassifier(),
        "params" : {
            "n_estimators" : [1, 5, 10]
            
        }
    },
        
    "SVM" : {
        "model" : SVC(max_iter=10000),
        "params" : {
            "C" : [1, 10, 30],
            "kernel" : ["rbf", 'linear']
        }
    },
        
    "Logistic Regresion" : {
        "model" : LogisticRegression(max_iter=10000),
        "params" : {
            "C" : [1, 5, 10]
        }        
    },
        
    "Gaussian NB" : {
        "model" : GaussianNB(),
        "params" : {}        
    },
        
    "Multinomial NB" : {
        "model" : MultinomialNB(),
        "params" : {}  
    } 
}

In [37]:
scores = []

for model_name, mp in model_selection.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,Decision Tree,0.810812,{'criterion': 'entropy'}
1,Random Forest,0.913202,{'n_estimators': 10}
2,SVM,0.97385,"{'C': 10, 'kernel': 'rbf'}"
3,Logistic Regresion,0.914316,{'C': 1}
4,Gaussian NB,0.806928,{}
5,Multinomial NB,0.87035,{}


In [38]:
model_params = {
    'svm': {
        'model': SVC(max_iter=10000),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto', max_iter=10000),
        'params': {
            'C': [1,5,10]
        }
    },
    'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy'],
            
        }
    }     
}

In [39]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.97385,"{'C': 10, 'kernel': 'rbf'}"
1,random_forest,0.897091,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,naive_bayes_gaussian,0.806928,{}
4,naive_bayes_multinomial,0.87035,{}
5,decision_tree,0.808032,{'criterion': 'entropy'}


### The best model to use is SVM with C = 10 and Kernel= rbf