<a href="https://colab.research.google.com/github/RifatMuhtasim/Machine_Learning/blob/main/Miscellaneous_Topics/Finding_Optimal_Model_And_Hyperparameters_Of_Digits_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB

In [2]:
# Suppress Warnings for clean notebook

import warnings
warnings.filterwarnings("ignore")

In [3]:
# Import Dataset and Load

from sklearn.datasets import load_digits
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

# For Single value GridSearchCV

In [4]:
single_clf = GridSearchCV(MultinomialNB(), {
    "alpha": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
  },
  cv = 5,
  return_train_score=False
)

single_clf.fit(digits['data'], digits['target'])
pd.DataFrame(single_clf.cv_results_)[['param_alpha', 'mean_test_score', 'rank_test_score']]

Unnamed: 0,param_alpha,mean_test_score,rank_test_score
0,1.0,0.87035,6
1,2.0,0.871464,5
2,3.0,0.872021,3
3,4.0,0.872021,3
4,5.0,0.873691,1
5,6.0,0.873691,1


In [5]:
X_train, X_test, y_train, y_test = train_test_split(digits['data'], digits['target'], test_size=0.25)

MultinomialNB_model = MultinomialNB(alpha=5.0)
MultinomialNB_model.fit(X_train, y_train)
MultinomialNB_model.score(X_test, y_test)

0.8888888888888888

# GridSearchCV for Multiple value.

In [6]:
model_params = {
    "logistic_regression": {
        "model": LogisticRegression(),
        "params": {
            "C": [1, 2, 5, 10],
            "max_iter": [50, 100, 200]
        }
    },
    "decision_tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            "criterion": ["gini", "entrophy"]
        }
    },
    "random_forest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [1, 5, 10, 20, 40, 70, 100]
        }
    },
    "support_vector_machine": {
        "model": SVC(),
        "params": {
            "C": [1, 10, 20],
            "kernel": ['linear', 'rbf']
        }
    },
    "GaussianNB": {
        "model": GaussianNB(),
        "params": {
            "var_smoothing": [0.000000001, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001 ]
        }
    },
    "MultinomialNB": {
        "model": MultinomialNB(),
        "params": {
            "alpha": [1.0, 2.0, 3.0, 5.0, 10.0]
        }
    }
}

In [7]:
scores = []

for model, mp in model_params.items():
  clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
  clf.fit(digits['data'], digits['target'])
  scores.append({
      "model": model,
      "best score": clf.best_score_,
      "best params": clf.best_params_
  })

In [8]:
df = pd.DataFrame(scores, columns=['model', 'best score', 'best params'])
df

Unnamed: 0,model,best score,best params
0,logistic_regression,0.917662,"{'C': 5, 'max_iter': 50}"
1,decision_tree,0.783019,{'criterion': 'gini'}
2,random_forest,0.938807,{'n_estimators': 100}
3,support_vector_machine,0.97385,"{'C': 10, 'kernel': 'rbf'}"
4,GaussianNB,0.863115,{'var_smoothing': 0.001}
5,MultinomialNB,0.874246,{'alpha': 10.0}


**Comment:** For this model winner is SVM (Support Vector Machine) for C = 10 and kernel = "rbf"