# Note that it returns accuracy as score, it might not be the best for evaluation all the time. For example, identifying Cancer.

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn import datasets
digits = datasets.load_digits()

In [21]:
df = pd.DataFrame(digits.data,columns=digits.feature_names)
df['target'] = digits.target
df['target'] = df['target'].apply(lambda x: digits.target_names[x])

# Grid Search on 1 model only

In [49]:
# Grid Search
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf', 'linear']
    }, 
    cv=5, 
    return_train_score=False
)

clf.fit(digits.data, digits.target)
clf.best_params_

{'C': 1, 'kernel': 'linear'}

# Randomized Search
1. Try this when the computation cost for Grid Search is high.
2. It will try random combination of these parameter values.

In [48]:
from sklearn.model_selection import RandomizedSearchCV
rs = RandomizedSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf', 'linear']
    }, 
    cv=5, 
    return_train_score=False,
    n_iter=2
)

rs.fit(digits.data, digits.target)
rs.best_params_

{'kernel': 'linear', 'C': 10}

# Grid Search on Multiple Models

In [39]:
# I want to use Grid Search to find the best model and parameters
# First import all the libraries for different models.
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

# Find out the best model and parameter
from sklearn.model_selection import GridSearchCV
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    },
    'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy'],
            
        }
    }     
}

In [43]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
grid_result = pd.DataFrame(scores,columns=['model','best_score','best_params'])
grid_result.sort_values(['best_score'], ascending=False)

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
2,logistic_regression,0.922114,{'C': 1}
1,random_forest,0.908219,{'n_estimators': 10}
4,naive_bayes_multinomial,0.87035,{}
5,decision_tree,0.812498,{'criterion': 'entropy'}
3,naive_bayes_gaussian,0.806928,{}
