In [10]:
#simple cross validation

from sklearn.model_selection import cross_val_score as cvs
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
logreg = LogisticRegression(max_iter=10000)
scores = cvs(logreg, iris.data, iris.target,cv=10)
scores

array([1.        , 0.93333333, 1.        , 1.        , 0.93333333,
       0.93333333, 0.93333333, 1.        , 1.        , 1.        ])

In [11]:
scores.mean()

0.9733333333333334

In [21]:
# stratified cross validation : this should be prefered because of things like imbalanced data or data ordered by label

from sklearn.model_selection import KFold
kf= KFold(n_splits=20, shuffle=True, random_state=13) 
logreg = LogisticRegression(max_iter=10000)
scores = cvs(logreg, iris.data, iris.target,cv=kf)
scores

array([1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.875     , 1.        , 0.75      , 1.        ,
       0.85714286, 1.        , 1.        , 1.        , 0.85714286,
       1.        , 1.        , 1.        , 1.        , 1.        ])

In [20]:
scores.mean()

0.9599999999999999

In [25]:
# paramaeter grid and cross validation

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                  'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
print("Parameter grid:\n{}".format(param_grid))


Parameter grid:
{'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}


In [34]:
from sklearn.model_selection import GridSearchCV 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split as tts

grid_search = GridSearchCV(SVC(random_state=10), param_grid, cv=5)
X_train, X_test, y_train, y_test = tts(iris.data, iris.target, random_state=0)
grid_search.fit(X_train,y_train)
score=grid_search.score(X_test,y_test)
score # this is test score

0.9736842105263158

In [35]:
grid_search.best_params_

{'C': 10, 'gamma': 0.1}

In [36]:
grid_search.best_score_ # this is score inside grid search 

0.9731225296442687

In [37]:
grid_search.best_estimator_

SVC(C=10, gamma=0.1, random_state=10)

In [38]:
grid_search.cv_results_

{'mean_fit_time': array([0.00143056, 0.00126405, 0.00124993, 0.00117798, 0.0012568 ,
        0.00119324, 0.00116854, 0.00117798, 0.00117674, 0.00118489,
        0.00122666, 0.0012619 , 0.00118623, 0.00121002, 0.00109515,
        0.00108957, 0.00129008, 0.00134096, 0.00127878, 0.00106444,
        0.00089374, 0.00100555, 0.00166168, 0.00142179, 0.00112433,
        0.00084295, 0.0008348 , 0.00095134, 0.00149403, 0.00146828,
        0.00081224, 0.00083709, 0.00076199, 0.00091896, 0.00139089,
        0.00142436]),
 'std_fit_time': array([2.10824014e-04, 1.28915675e-04, 7.37980953e-05, 3.73553755e-05,
        8.55922832e-05, 5.13395313e-05, 1.52419414e-05, 4.05960125e-05,
        3.63644533e-05, 3.36180004e-05, 3.32478961e-05, 3.40390933e-05,
        6.56822372e-05, 4.85000823e-05, 4.68574946e-05, 1.56827965e-05,
        7.67325668e-05, 1.08373726e-04, 8.30606805e-05, 1.51237879e-04,
        8.75211970e-05, 1.08390404e-04, 3.82940188e-04, 8.38933425e-05,
        9.42910244e-05, 2.75373356e-0

In [42]:
# outer and inner loops of cross-validation

kf= KFold(n_splits=20, shuffle=True, random_state=13) 
scores = cvs(GridSearchCV(SVC(), param_grid, cv=10),
                             iris.data, iris.target, cv=kf)
print("Cross-validation scores: ", scores) 
print("Mean cross-validation score: ", scores.mean())

Cross-validation scores:  [1.         1.         1.         1.         1.         1.
 0.875      1.         0.75       1.         1.         1.
 1.         1.         0.85714286 1.         1.         1.
 1.         1.        ]
Mean cross-validation score:  0.9741071428571428
