In [85]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [86]:
from sklearn.datasets import load_digits

In [87]:
digits=load_digits()

In [88]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [89]:
df=pd.DataFrame(data=digits.data,columns=digits.feature_names)

In [90]:
df.head(1)

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0


In [91]:
df['Digit Number']=digits.target

In [92]:
digits.target_names

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [93]:
df['Digit Number']=df['Digit Number'].apply(lambda x: digits.target_names[x])

In [94]:
df.head(2)

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,Digit Number
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1


In [95]:
inputs=df.drop('Digit Number',axis='columns')
output=df[['Digit Number']]

In [96]:
X_train,X_test,y_train,y_test=train_test_split(inputs,output,test_size=0.2,random_state=20)

# For SVM method


In [98]:
from sklearn.model_selection import GridSearchCV

In [99]:
model=GridSearchCV(SVC(),{
    'C':[3,7,4],
    'kernel':['rbf','linear','poly']
},cv=5,return_train_score=False)

In [100]:
model.fit(X_train,y_train)

In [101]:
svm_results=model.cv_results_

In [102]:
svm_r=pd.DataFrame(svm_results)

In [103]:
svm_r[['param_C','param_kernel','mean_test_score','rank_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,3,rbf,0.987469,3
1,3,linear,0.979121,7
2,3,poly,0.984688,4
3,7,rbf,0.98886,2
4,7,linear,0.979121,7
5,7,poly,0.983991,5
6,4,rbf,0.989557,1
7,4,linear,0.979121,7
8,4,poly,0.983991,5


# For RandomForest Method

In [129]:
model_l=GridSearchCV(RandomForestClassifier(),{
    'criterion':['gini','entropy']
},cv=5,return_train_score=False)

In [131]:
model_l.fit(X_train,y_train)

In [135]:
rf_results=model_l.cv_results_

In [137]:
rf_res=pd.DataFrame(rf_results)

In [139]:
rf_res.head(1)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.258654,0.010085,0.008572,0.005236,gini,{'criterion': 'gini'},0.972222,0.993056,0.944251,0.97561,0.97561,0.97215,0.015744,1


In [143]:
rf_res[['param_criterion','mean_test_score','rank_test_score']]

Unnamed: 0,param_criterion,mean_test_score,rank_test_score
0,gini,0.97215,1
1,entropy,0.969369,2


# For Decision Tree Model

In [146]:
model_t=GridSearchCV(DecisionTreeClassifier(),{
    'splitter':['best','random'],
    'criterion':['gini','entropy']
},cv=5,return_train_score=False)

In [148]:
model_t.fit(X_train,y_train)

In [150]:
model_t.cv_results_

{'mean_fit_time': array([0.01075931, 0.00673456, 0.01571999, 0.01020904]),
 'std_fit_time': array([0.00631148, 0.00827863, 0.00385629, 0.00593996]),
 'mean_score_time': array([0.00556698, 0.        , 0.00352964, 0.00074129]),
 'std_score_time': array([0.0056597 , 0.        , 0.00610824, 0.00117897]),
 'param_criterion': masked_array(data=['gini', 'gini', 'entropy', 'entropy'],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_splitter': masked_array(data=['best', 'random', 'best', 'random'],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'criterion': 'gini', 'splitter': 'best'},
  {'criterion': 'gini', 'splitter': 'random'},
  {'criterion': 'entropy', 'splitter': 'best'},
  {'criterion': 'entropy', 'splitter': 'random'}],
 'split0_test_score': array([0.80902778, 0.86111111, 0.85763889, 0.82291667]),
 'split1_test_score': array([0.82638889, 0.79861111, 0.83333333

In [152]:
dt=model_t.cv_results_

In [154]:
dt_results=pd.DataFrame(dt)

In [156]:
dt_results.head(1)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.010759,0.006311,0.005567,0.00566,gini,best,"{'criterion': 'gini', 'splitter': 'best'}",0.809028,0.826389,0.811847,0.867596,0.84669,0.83231,0.022126,4


In [158]:
dt_results[['param_criterion','param_splitter','mean_test_score','rank_test_score']]

Unnamed: 0,param_criterion,param_splitter,mean_test_score,rank_test_score
0,gini,best,0.83231,4
1,gini,random,0.838565,3
2,entropy,best,0.85248,1
3,entropy,random,0.846223,2


# For Logistic Regression model


In [161]:
log=GridSearchCV(LogisticRegression(),{
    'C':[1,2,3,4,5],
    'penalty':['l1', 'l2', 'elasticnet']
},cv=5,return_train_score=False)
    

In [163]:
log.fit(X_train,y_train)

In [169]:
log_res=log.cv_results_

In [171]:
Log_results=pd.DataFrame(log_res)

In [173]:
Log_results.head(1)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000614,0.000501,0.0,0.0,1,l1,"{'C': 1, 'penalty': 'l1'}",,,,,,,,6


In [175]:
Log_results[['param_C','param_penalty','mean_test_score','rank_test_score']]

Unnamed: 0,param_C,param_penalty,mean_test_score,rank_test_score
0,1,l1,,6
1,1,l2,0.956158,1
2,1,elasticnet,,6
3,2,l1,,6
4,2,l2,0.956156,3
5,2,elasticnet,,6
6,3,l1,,6
7,3,l2,0.954067,5
8,3,elasticnet,,6
9,4,l1,,6


# For Naive Bayes

In [185]:
nb_m=GridSearchCV(MultinomialNB(),{
    'alpha':[0.3,0.5,1,0.5],
},cv=5,return_train_score=False)
    

In [187]:
nb_m.fit(X_train,y_train)

In [189]:
nb_mres=nb_m.cv_results_

In [191]:
nb_mresults=pd.DataFrame(nb_mres)

In [193]:
nb_mresults.head(1)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.00375,0.00095,0.003264,0.000401,0.3,{'alpha': 0.3},0.888889,0.90625,0.888502,0.909408,0.888502,0.89631,0.009459,1


In [195]:
nb_mresults[['param_alpha','mean_test_score','rank_test_score']]

Unnamed: 0,param_alpha,mean_test_score,rank_test_score
0,0.3,0.89631,1
1,0.5,0.89631,1
2,1.0,0.89631,1
3,0.5,0.89631,1


**Results SVM:0.989557	,random_forest:0.972150	, DecisionTree:0.852480, Logistic Regression:0.956158 , For MultinomialNB:0.89631**

# SVM>RandomForest>Logistic Regression> MultinomialNB>DecisionTree