#### Exercise 9

In [130]:
import pandas as pd

In [131]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [132]:
from sklearn.datasets import load_digits

In [133]:
digits = load_digits()
digits

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
 

In [134]:
from sklearn.preprocessing import MinMaxScaler

In [135]:
scaler = MinMaxScaler()

In [136]:
X_scale = scaler.fit_transform(digits.data)
X_scale

array([[0.    , 0.    , 0.3125, ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.625 , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 1.    , 0.5625, 0.    ],
       ...,
       [0.    , 0.    , 0.0625, ..., 0.375 , 0.    , 0.    ],
       [0.    , 0.    , 0.125 , ..., 0.75  , 0.    , 0.    ],
       [0.    , 0.    , 0.625 , ..., 0.75  , 0.0625, 0.    ]])

In [137]:
from sklearn.model_selection import GridSearchCV

In [138]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma = 'auto'),
        'params' : {
            'C': [1, 10, 20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1, 5, 10, 20, 30]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(solver = 'liblinear', multi_class = 'auto'),
        'params': {
            'C': [1, 5, 10]
        }
    },
    'gaussian_nb': {
        'model': GaussianNB(),
        'params': {}
    },
    'multi_nomial_nb': {
        'model': MultinomialNB(),
        'params': {
            'alpha': [1, 0.90, 0.75]
        }
    },
    'decesion_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini', 'entropy', 'log_loss']
        }
    }
}

In [139]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv = 5, return_train_score = False)
    clf.fit(X_scale, digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores, columns = ['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.957719,"{'C': 20, 'kernel': 'rbf'}"
1,random_forest,0.922106,{'n_estimators': 30}
2,logistic_regression,0.928776,{'C': 1}
3,gaussian_nb,0.796905,{}
4,multi_nomial_nb,0.874245,{'alpha': 1}
5,decesion_tree,0.811939,{'criterion': 'entropy'}


In [140]:
from sklearn.model_selection import RandomizedSearchCV

In [141]:
scores_random = []

for model_name, mp in model_params.items():
    clf_rand =  RandomizedSearchCV(mp['model'], mp['params'], cv = 5, return_train_score = False)
    clf_rand.fit(X_scale, digits.target)
    scores_random.append({
        'model': model_name,
        'best_score': clf_rand.best_score_,
        'best_params': clf_rand.best_params_
    })
    
df_rand = pd.DataFrame(scores_random, columns = ['model','best_score','best_params'])
df_rand



Unnamed: 0,model,best_score,best_params
0,svm,0.957719,"{'kernel': 'rbf', 'C': 20}"
1,random_forest,0.934359,{'n_estimators': 30}
2,logistic_regression,0.928776,{'C': 1}
3,gaussian_nb,0.796905,{}
4,multi_nomial_nb,0.874245,{'alpha': 1}
5,decesion_tree,0.811368,{'criterion': 'entropy'}
