# Решетчатый поиск
Настройка параметров алгоритма является сложной задачей. Нужно понимать содержательный смысл этих параметров, прежде чем пытаться корректировать их. Библиотека scikit-learn предлагает стандартные методы для поиска оптимальных параметров. Одним из таких является *решетка*, которая перебирает все возможные комбинации параметров.

In [6]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

iris = load_iris()
X_train_and_val, X_test, y_train_and_val, y_test = train_test_split(iris.data, iris.target,
                                                   random_state=0)
print(f'Размер обучающего набора: {X_train_and_val.shape}')
print(f'Количество элементов в тестовом наборе: {len(y_test)}')

Размер обучающего набора: (112, 4)
Количество элементов в тестовом наборе: 38


Теперь, однако, для оценки качества модели при разных вариациях параметров нужно использовать независимый набор данных:
![image.png](attachment:image.png)

In [10]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_and_val,
                                                      y_train_and_val,
                                                     random_state=1)
print(f'Размер НАСТОЯЩЕГО обучающего набора: {X_train.shape}')
print(f'Количество элементов для валидации: {len(y_valid)}')

Размер НАСТОЯЩЕГО обучающего набора: (84, 4)
Количество элементов для валидации: 28


**Важно**: необходимо всегда иметь независимый набор данных, используемый для конечной оценки (X_test, y_test в данном случае)

In [13]:
from sklearn.model_selection import GridSearchCV
param_grid = {"C":[0.001, 0.01, 0.1, 1, 10, 100],
              "gamma": [0.01, 0.1, 1, 10, 100]}

# решетка с моделью SVC
# варьируемые параметры: C и gamma
# с исп. 5-блочной стратифицированной перекрестной проверки
grid = GridSearchCV(estimator=SVC(),
                   param_grid=param_grid,
                   cv=5)


# в данном случае будем использовать перекрестную проверку
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                   random_state=0)
grid.fit(X_train, y_train)
grid

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.01, 0.1, 1, 10, 100]})

Объект GridSearch автоматически перебирает все комбинации параметров, выбирает наилучшую (на основе проверок) и строит модель на этих наилучших параметрах

In [21]:
from sklearn.metrics import f1_score
print('F1 score = ', f1_score(grid.predict(X_test), y_test, average='micro'))
print('best parameters: ', grid.best_params_)
print('Средняя правильность во время обучения: ', grid.best_score_)

F1 score =  0.9736842105263158
best parameters:  {'C': 10, 'gamma': 0.1}
Средняя правильность во время обучения:  0.9731225296442687


Иногда очень важно визуализировать результаты проверок, чтобы понимать, как разные параметры влияют на обобщающую способность

In [23]:
import pandas as pd
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004061,0.008122,0.001616,0.003231,0.001,0.01,"{'C': 0.001, 'gamma': 0.01}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
1,0.001598,0.003196,0.0,0.0,0.001,0.1,"{'C': 0.001, 'gamma': 0.1}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
2,0.0,0.0,0.0,0.0,0.001,1.0,"{'C': 0.001, 'gamma': 1}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
3,0.0016,0.003201,0.0,0.0,0.001,10.0,"{'C': 0.001, 'gamma': 10}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
4,0.0,0.0,0.0,0.0,0.001,100.0,"{'C': 0.001, 'gamma': 100}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
5,0.0016,0.003201,0.0,0.0,0.01,0.01,"{'C': 0.01, 'gamma': 0.01}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
6,0.0,0.0,0.0016,0.003199,0.01,0.1,"{'C': 0.01, 'gamma': 0.1}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
7,0.0,0.0,0.0,0.0,0.01,1.0,"{'C': 0.01, 'gamma': 1}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
8,0.0016,0.0032,0.0,0.0,0.01,10.0,"{'C': 0.01, 'gamma': 10}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
9,0.001603,0.003205,0.0,0.0,0.01,100.0,"{'C': 0.01, 'gamma': 100}",0.347826,0.347826,0.363636,0.363636,0.409091,0.366403,0.022485,19
