In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import datasets, svm

In [3]:
X, y = datasets.load_iris(return_X_y=True)
X.shape, y.shape

((150, 4), (150,))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

In [5]:
X_train.shape, y_train.shape

((90, 4), (90,))

In [6]:
X_test.shape, y_test.shape

((60, 4), (60,))

In [7]:
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)

0.9666666666666667

In [8]:
from sklearn.model_selection import cross_val_score

In [9]:
clf = svm.SVC(kernel='linear', C=1, random_state=42)
scores = cross_val_score(clf, X, y, cv=5)
scores

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [10]:
print(f'mean accuracy: {scores.mean():.2f}\nstandart deviation: {scores.std():.2f}')

mean accuracy: 0.98
standart deviation: 0.02


In [13]:
def custom_cv_2folds(X):
    n = X.shape[0]
    i = 1
    while i <= 2:
        idx = np.arange(n * (i-1) / 2, n * i / 2, dtype=int)
        yield idx, idx
        i += 1

custom_cv = custom_cv_2folds(X)
cross_val_score(clf, X, y, cv=custom_cv)

array([1.        , 0.97333333])

Пример использования поиска по сетке 'grid search' для настройки параметра `C` с помощью модуля 'GridSearchCV' из библиотеки scikit-learn (от chat-gpt)

In [16]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Задаем значения `C`, которые нужно проверить
param_grid = {'C': [0.01, 0.1, 1, 10, 100]}

# Создаем объект классификатора SVM
svm = SVC(kernel='linear')

# Создаем объект GridSearchCV для поиска по сетке
grid_search = GridSearchCV(svm, param_grid)

# Обучаем модель и выполняем поиск по сетке
grid_search.fit(X_train, y_train)

# Получаем лучшее найденное значение `C`
best_C = grid_search.best_params_['C']
best_C

1