In [1]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from sklearn.datasets import load_digits

In [2]:
from sklearn.model_selection import cross_val_score

In [3]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [4]:
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop(df[['target']], axis=1), df.target, test_size=0.1)

In [6]:
cross_val_score(svm.SVC(kernel='rbf', C=10,gamma='auto'), X_train,y_train,cv=5)

array([0.57098765, 0.58950617, 0.56037152, 0.59442724, 0.56965944])

In [7]:
cross_val_score(svm.SVC(kernel='rbf', C=20,gamma='auto'), X_train,y_train,cv=5)

array([0.57098765, 0.58950617, 0.56037152, 0.59442724, 0.56965944])

In [8]:
cross_val_score(svm.SVC(kernel='rbf', C=40,gamma='auto'), X_train,y_train,cv=5)

array([0.57098765, 0.58950617, 0.56037152, 0.59442724, 0.56965944])

In [9]:
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf','linear']
}, cv=5, return_train_score=False)
clf.fit(X_train, y_train)
clf.cv_results_

{'mean_fit_time': array([0.24056921, 0.03091989, 0.23078198, 0.03151622, 0.24432988,
        0.03430777]),
 'std_fit_time': array([0.01640693, 0.00190058, 0.0011972 , 0.00101139, 0.0152229 ,
        0.00225273]),
 'mean_score_time': array([0.07101245, 0.00897312, 0.06960855, 0.0077837 , 0.0718256 ,
        0.01016822]),
 'std_score_time': array([0.00581805, 0.00167125, 0.00170588, 0.00098443, 0.00166754,
        0.00146441]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],


In [10]:
result_df = pd.DataFrame(clf.cv_results_)
result_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.240569,0.016407,0.071012,0.005818,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.533951,0.555556,0.532508,0.578947,0.510836,0.542359,0.023127,6
1,0.03092,0.001901,0.008973,0.001671,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966049,0.984568,0.978328,0.978328,0.972136,0.975882,0.006295,1
2,0.230782,0.001197,0.069609,0.001706,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.570988,0.589506,0.560372,0.594427,0.569659,0.57699,0.012858,4
3,0.031516,0.001011,0.007784,0.000984,10,linear,"{'C': 10, 'kernel': 'linear'}",0.966049,0.984568,0.978328,0.978328,0.972136,0.975882,0.006295,1
4,0.24433,0.015223,0.071826,0.001668,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.570988,0.589506,0.560372,0.594427,0.569659,0.57699,0.012858,4
5,0.034308,0.002253,0.010168,0.001464,20,linear,"{'C': 20, 'kernel': 'linear'}",0.966049,0.984568,0.978328,0.978328,0.972136,0.975882,0.006295,1


In [11]:
result_df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.542359
1,1,linear,0.975882
2,10,rbf,0.57699
3,10,linear,0.975882
4,20,rbf,0.57699
5,20,linear,0.975882


In [12]:
from sklearn.model_selection import RandomizedSearchCV
rsc = RandomizedSearchCV(svm.SVC(gamma='auto'), {
        'C':[1,10,20],
        'kernel':['rbf','linear'],
    },
    cv=5,
    return_train_score = False,
    n_iter = 2
    )
rsc.fit(X_train,y_train)
pd.DataFrame(rsc.cv_results_)[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.542359
1,10,linear,0.975882


In [13]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}


In [14]:
scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(X_train,y_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
pd.DataFrame(scores,columns=['model','best_score','best_params'])

Unnamed: 0,model,best_score,best_params
0,svm,0.975882,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.941251,{'n_estimators': 10}
2,logistic_regression,0.95485,{'C': 1}
