In [136]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

import numpy as np
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
digits = load_digits()


In [112]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data,digits.target,test_size=0.3)

In [113]:
lr = LogisticRegression(solver='liblinear',multi_class='ovr')
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.95

In [114]:
svm = SVC(gamma='auto')
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.34629629629629627

In [115]:
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9703703703703703

In [116]:
from sklearn.model_selection import KFold

kf = KFold(n_splits = 9)
kf

KFold(n_splits=9, random_state=None, shuffle=False)

In [117]:
for i, j in kf.split([1,2,3,4,5,6,7,8,9]):
    print(i,j)

[1 2 3 4 5 6 7 8] [0]
[0 2 3 4 5 6 7 8] [1]
[0 1 3 4 5 6 7 8] [2]
[0 1 2 4 5 6 7 8] [3]
[0 1 2 3 5 6 7 8] [4]
[0 1 2 3 4 6 7 8] [5]
[0 1 2 3 4 5 7 8] [6]
[0 1 2 3 4 5 6 8] [7]
[0 1 2 3 4 5 6 7] [8]


In [118]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [119]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=3)

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in folds.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))  
    scores_svm.append(get_score(SVC(gamma='auto'), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [120]:
from sklearn.model_selection import cross_val_score

In [121]:
cross_val_score(SVC(),digits.data,digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [122]:
cross_val_score(LogisticRegression(),digits.data,digits.target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

array([0.92222222, 0.86944444, 0.94150418, 0.93871866, 0.89693593])

In [123]:
cross_val_score(RandomForestClassifier(),digits.data,digits.target)

array([0.91944444, 0.92222222, 0.95543175, 0.9637883 , 0.92479109])

In [124]:
from sklearn.model_selection import GridSearchCV





In [125]:
from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(SVC(gamma='auto'),
                   {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']},
                   cv=5, return_train_score=False)

clf.fit(digits.data, digits.target)


In [126]:
clf.cv_results_

{'mean_fit_time': array([0.22318096, 0.0276824 , 0.21902223, 0.02264566, 0.21430202,
        0.02395482]),
 'std_fit_time': array([0.01493711, 0.00347404, 0.00594757, 0.00129904, 0.00491381,
        0.00214312]),
 'mean_score_time': array([0.05858111, 0.00692263, 0.05901289, 0.00551181, 0.05594053,
        0.00558252]),
 'std_score_time': array([0.00284572, 0.00063963, 0.00469466, 0.00045392, 0.0022802 ,
        0.00051456]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],


In [132]:
import pandas as pd

df = pd.DataFrame(clf.cv_results_)

df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.223181,0.014937,0.058581,0.002846,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.411111,0.45,0.454039,0.448468,0.479109,0.448545,0.021761,6
1,0.027682,0.003474,0.006923,0.00064,1,linear,"{'C': 1, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,1
2,0.219022,0.005948,0.059013,0.004695,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,4
3,0.022646,0.001299,0.005512,0.000454,10,linear,"{'C': 10, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,1
4,0.214302,0.004914,0.055941,0.00228,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,4
5,0.023955,0.002143,0.005583,0.000515,20,linear,"{'C': 20, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,1


In [142]:
gujo1 = GaussianNB()

gujo1.fit(X_train,y_train)

gujo1.score(X_train,y_train)

0.8606010016694491

In [148]:
gujo2 = MultinomialNB()

gujo2.fit(X_train,y_train)

gujo2.score(X_train,y_train)

0.9123539232053423

In [152]:
gujo3 = DecisionTreeClassifier()

gujo3.fit(X_train,y_train)

gujo3.score(X_train,y_train)

1.0

In [162]:
model_parms = {
    'svm': {
        'model':SVC(gamma='auto'),
        'params':{
            'C':[1,10,20],
            'kernel':['rbf','linear']
        }
    },
    'random_forest': {
        'model':RandomForestClassifier(),
        'params':{
            
            'n_estimators':[1,5,10]
        }
    },
    'logistic_regression': {
        'model':LogisticRegression(solver = 'liblinear',multi_class = 'auto'),
        'params':{
            'C':[1,5,10]
        }
    },
    'lgaussian': {
        'model':GaussianNB(),
        'params':{
            
        }
    },
    'multimodal': {
        'model':MultinomialNB(),
        'params':{
            
        }
    },
    'decision_tree': {
        'model':DecisionTreeClassifier(),
        'params':{
            'criterion': ['gini','entropy'],
        }
    }
}

In [172]:
scores = []

for m , k in model_parms.items():
    print(m,k)
    
    clf = GridSearchCV(k['model'],k['params'],cv=5,return_train_score=False)
    clf.fit(digits.data,digits.target)
    scores.append({
        'model':m,
        'best_score':clf.best_score_,
        'best_params':clf.best_params_})
                       

svm {'model': SVC(gamma='auto'), 'params': {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']}}
random_forest {'model': RandomForestClassifier(), 'params': {'n_estimators': [1, 5, 10]}}
logistic_regression {'model': LogisticRegression(solver='liblinear'), 'params': {'C': [1, 5, 10]}}
lgaussian {'model': GaussianNB(), 'params': {}}
multimodal {'model': MultinomialNB(), 'params': {}}
decision_tree {'model': DecisionTreeClassifier(), 'params': {'criterion': ['gini', 'entropy']}}


In [173]:
scores

[{'model': 'svm',
  'best_score': 0.9476973073351903,
  'best_params': {'C': 1, 'kernel': 'linear'}},
 {'model': 'random_forest',
  'best_score': 0.8937542556484062,
  'best_params': {'n_estimators': 10}},
 {'model': 'logistic_regression',
  'best_score': 0.9221138966264315,
  'best_params': {'C': 1}},
 {'model': 'lgaussian', 'best_score': 0.8069281956050759, 'best_params': {}},
 {'model': 'multimodal', 'best_score': 0.8703497369235531, 'best_params': {}},
 {'model': 'decision_tree',
  'best_score': 0.8136010523057877,
  'best_params': {'criterion': 'entropy'}}]

In [178]:
dfo = pd.DataFrame(scores)

dfo

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.893754,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,lgaussian,0.806928,{}
4,multimodal,0.87035,{}
5,decision_tree,0.813601,{'criterion': 'entropy'}
