In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.datasets import load_digits
import pandas as pd
import numpy as np

In [3]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [4]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [5]:
model_params = {                           # creating model porameters
    'svm': {
        'model': SVC(gamma='auto'),
        'params': {
            'C':[1,10,20],
            'kernel': ['rbf','linear']
        }
    },
    'random_forest':{
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(),
        'params': {
            'C': [1,5,10]
        }
    },
     'Gaussian_NB': {
        'model': GaussianNB(),
        'params': {
       
        }
    },
     'Multinomial_NB': {
        'model': MultinomialNB(),
        'params': {
           
        }
    },
     'Decision_Tree_Classifier': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy']
        }
    }
}

In [6]:
model_params

{'svm': {'model': SVC(gamma='auto'),
  'params': {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}},
 'random_forest': {'model': RandomForestClassifier(),
  'params': {'n_estimators': [1, 5, 10]}},
 'logistic_regression': {'model': LogisticRegression(),
  'params': {'C': [1, 5, 10]}},
 'Gaussian_NB': {'model': GaussianNB(), 'params': {}},
 'Multinomial_NB': {'model': MultinomialNB(), 'params': {}},
 'Decision_Tree_Classifier': {'model': DecisionTreeClassifier(),
  'params': {'criterion': ['gini', 'entropy']}}}

In [7]:
df_params = pd.DataFrame(model_params) # creating and viewing the newly created parameters in dataframe.
df_params

Unnamed: 0,svm,random_forest,logistic_regression,Gaussian_NB,Multinomial_NB,Decision_Tree_Classifier
model,SVC(gamma='auto'),RandomForestClassifier(),LogisticRegression(),GaussianNB(),MultinomialNB(),DecisionTreeClassifier()
params,"{'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}","{'n_estimators': [1, 5, 10]}","{'C': [1, 5, 10]}",{},{},"{'criterion': ['gini', 'entropy']}"


In [10]:
from sklearn.model_selection import GridSearchCV #takes (estimator, param_grid, cv)

In [11]:
scores = []
for i,j in model_params.items():
    clf = GridSearchCV(j['model'], j['params'],cv =5)
    clf.fit(digits.data,digits.target)
    scores.append({
        'MODEL': i,
        'BEST SCORE': clf.best_score_,
        'BEST PARAMS': clf.best_params_
    })
scores

[{'MODEL': 'svm',
  'BEST SCORE': 0.9476973073351903,
  'BEST PARAMS': {'C': 1, 'kernel': 'linear'}},
 {'MODEL': 'random_forest',
  'BEST SCORE': 0.9020875889817394,
  'BEST PARAMS': {'n_estimators': 10}},
 {'MODEL': 'logistic_regression',
  'BEST SCORE': 0.9137650882079852,
  'BEST PARAMS': {'C': 1}},
 {'MODEL': 'Gaussian_NB', 'BEST SCORE': 0.8069281956050759, 'BEST PARAMS': {}},
 {'MODEL': 'Multinomial_NB',
  'BEST SCORE': 0.8703497369235531,
  'BEST PARAMS': {}},
 {'MODEL': 'Decision_Tree_Classifier',
  'BEST SCORE': 0.8080439492417207,
  'BEST PARAMS': {'criterion': 'entropy'}}]

In [12]:
best_model_df = pd.DataFrame(scores) # creating dataframe from the above scores 
best_model_df

Unnamed: 0,MODEL,BEST SCORE,BEST PARAMS
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.902088,{'n_estimators': 10}
2,logistic_regression,0.913765,{'C': 1}
3,Gaussian_NB,0.806928,{}
4,Multinomial_NB,0.87035,{}
5,Decision_Tree_Classifier,0.808044,{'criterion': 'entropy'}


In [13]:
max(best_model_df['BEST SCORE'])

0.9476973073351903

__Exploring by changing the values of parameters__

In [14]:
model_params1 = {                           # creating model porameters
    'svm': {
        'model': SVC(gamma='auto'),
        'params': {
            'C':list(range(1,10)),
            'kernel': ['rbf','linear']
        }
    },
    'random_forest':{
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': list(range(1,10))
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(),
        'params': {
            'C': list(range(1,10))
        }
    },
     'Gaussian_NB': {
        'model': GaussianNB(),
        'params': {
       
        }
    },
     'Multinomial_NB': {
        'model': MultinomialNB(),
        'params': {
           
        }
    },
     'Decision_Tree_Classifier': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy']
        }
    }
}

In [15]:
scores1 = []
for i,j in model_params1.items():
    clf = GridSearchCV(j['model'], j['params'],cv =5)
    clf.fit(digits.data,digits.target)
    scores.append({
        'MODEL': i,
        'BEST SCORE': clf.best_score_,
        'BEST PARAMS': clf.best_params_
    })
scores1

[]

In [16]:
best_model_df1 = pd.DataFrame(scores) # creating dataframe from the above scores 
best_model_df1

Unnamed: 0,MODEL,BEST SCORE,BEST PARAMS
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.902088,{'n_estimators': 10}
2,logistic_regression,0.913765,{'C': 1}
3,Gaussian_NB,0.806928,{}
4,Multinomial_NB,0.87035,{}
5,Decision_Tree_Classifier,0.808044,{'criterion': 'entropy'}
6,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
7,random_forest,0.90598,{'n_estimators': 9}
8,logistic_regression,0.913765,{'C': 1}
9,Gaussian_NB,0.806928,{}


___svm (C=1, kernel=linear) with 94.7697% score is the best score.___