In [1]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm

In [2]:
iris = datasets.load_iris()
iris.data.shape,iris.target.shape

((150, 4), (150,))

In [3]:
train_data,test_data,train_labels,test_labels = train_test_split(iris.data,
                                                                iris.target,
                                                                test_size=0.4,
                                                                random_state=2019)

In [4]:
(train_data.shape,test_data.shape,train_labels.shape,test_labels.shape)

((90, 4), (60, 4), (90,), (60,))

In [5]:
classifier = svm.SVC(kernel="linear",
                    C=1)

In [6]:
classifier.fit(train_data,train_labels)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [7]:
scores = classifier.score(test_data,test_labels)

In [8]:
from sklearn.model_selection import cross_val_score

In [9]:
clf = svm.SVC(kernel="linear",
             C=1)

In [10]:
score2 = cross_val_score(clf,iris.data,iris.target,
                        cv=5)

In [11]:
score2

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [12]:
mean_score2 = np.mean(score2)
std_score2 = np.std(score2)

In [13]:
from sklearn.model_selection import ShuffleSplit

In [14]:
n_samples = iris.data.shape[0]

In [15]:
cv = ShuffleSplit(n_splits=5,
                 test_size=0.3,
                 random_state=2019)

In [16]:
cross_val_score(clf,iris.data,iris.target,
               cv=cv)

array([1.        , 0.97777778, 1.        , 1.        , 1.        ])

In [17]:
def custom_cv_2folds(x):
    n = x.shape[0]
    i = 1
    while i<=2:
        idx = np.arange(n*(i-1)/2,
                       n*i/2,
                       dtype=int)
        yield idx,idx
        i += 1

In [22]:
custom_cv = custom_cv_2folds(iris.data)
cross_val_score(clf,iris.data,iris.target,
               cv=custom_cv)

array([1.        , 0.97333333])

In [23]:
from sklearn import preprocessing

In [24]:
train_data,test_data,train_labels,test_labels = train_test_split(
    iris.data,iris.target,
    test_size=0.4,
    random_state=2019)

In [25]:
(train_data.shape,test_data.shape,
train_labels.shape,test_labels.shape)

((90, 4), (60, 4), (90,), (60,))

In [26]:
scaler = preprocessing.StandardScaler()

In [27]:
scaler.fit(train_data)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [30]:
train_data_transform = scaler.transform(train_data)
test_data_transform = scaler.transform(test_data)

In [31]:
clf = svm.SVC(C=1)
clf.fit(train_data_transform,train_labels)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [32]:
clf.score(test_data_transform,test_labels)

0.9666666666666667

In [33]:
from sklearn.pipeline import make_pipeline

In [35]:
clf = make_pipeline(preprocessing.StandardScaler(),
                   svm.SVC(C=1))
cross_val_score(clf,iris.data,iris.target,
               cv=cv)

array([1.        , 0.95555556, 1.        , 1.        , 1.        ])

In [41]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score

In [45]:
scoring = ["precision_macro",
          "recall_macro"]

In [46]:
clf = svm.SVC(kernel="linear",
             C=1,
             random_state=2019)

In [47]:
scores = cross_validate(clf,iris.data,iris.target,
                       scoring=scoring,
                       cv=5)

In [49]:
sorted(scores.keys())

['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']

In [51]:
scores["test_recall_macro"]

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [52]:
from sklearn.metrics.scorer import make_scorer

In [53]:
scoring = dict(prec_macro="precision_macro",
              rec_macro=make_scorer(recall_score,
                                   average="macro"))

In [55]:
scores = cross_validate(clf,iris.data,iris.target,
                       scoring=scoring,
                       cv=5,
                       return_train_score=True)

In [56]:
sorted(scores.keys())

['fit_time',
 'score_time',
 'test_prec_macro',
 'test_rec_macro',
 'train_prec_macro',
 'train_rec_macro']