In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm

boston = datasets.load_boston()
boston.data.shape, boston.target.shape

((506, 13), (506,))

In [2]:
import sklearn

In [4]:
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.4, random_state=0)

In [5]:
X_train.shape, y_train.shape

((303, 13), (303,))

In [6]:
X_test.shape, y_test.shape

((203, 13), (203,))

In [7]:
regression = svm.SVR(kernel='linear', C=1).fit(X_train, y_train)
regression.score(X_test, y_test)

0.667431382173115

## Computing cross-validated metrics

In [8]:
from sklearn.model_selection import cross_val_score

regression = svm.SVR(kernel='linear', C=1)
scores = cross_val_score(regression, boston.data, boston.target, cv=5)
scores

array([0.77285459, 0.72771739, 0.56131914, 0.15056451, 0.08212844])

In [9]:
print(f"Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.46 (+/- 0.58)


In [10]:
from sklearn import metrics
scores = cross_val_score(
    regression, boston.data, boston.target, cv=5, scoring='neg_mean_squared_error'
)
scores

array([ -7.84451123, -24.78772444, -35.13272326, -74.50555945,
       -24.40465975])

### K-fold

In [14]:
import numpy as np
from sklearn.model_selection import KFold

X = ["a", "b", "c", "d"]
kf = KFold(n_splits=2)

for train, test in kf.split(X):
    print('%s, %s' % (train, test))

[2 3], [0 1]
[0 1], [2 3]


### Stratified k-fold

In [15]:
from sklearn.model_selection import StratifiedKFold

X = np.ones(10)
y = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]

skf = StratifiedKFold(n_splits=3)

for train, test in skf.split(X, y):
    print("%s %s" % (train, test))

[2 3 6 7 8 9] [0 1 4 5]
[0 1 3 4 5 8 9] [2 6 7]
[0 1 2 4 5 6 7] [3 8 9]
