### sklearn.grid_search

In [2]:
from sklearn import model_selection, datasets, linear_model, metrics

import numpy as np
import pandas as pd

#### Генерация датасетов

In [3]:
iris = datasets.load_iris()

In [16]:
train_data, test_data, train_labels, test_labels = model_selection.train_test_split(iris.data, iris.target, 
                                                                                    test_size=0.3, random_state=0)

#### Задание модели

In [6]:
classifier = linear_model.SGDClassifier(random_state=0)

#### Генерация сетки

In [54]:
classifier.get_params()

{'alpha': 0.0001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 1000,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l2',
 'power_t': 0.5,
 'random_state': 0,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [55]:
parametres_grid = {
    'loss' : ['hinge', 'log', 'squared_hinge', 'squared_loss'],
    'penalty' : ['l1', 'l2'],
    'n_iter_no_change' : range(5,10),
    'alpha' : np.linspace(0.0001, 0.001, num=5)
}

In [56]:
cv = model_selection.StratifiedShuffleSplit(n_splits=10, test_size=0.2, random_state=0).split(train_data, train_labels)

### Подбор параметров и оценка качества

In [41]:
grid_cv = model_selection.GridSearchCV(classifier, parametres_grid, scoring='accuracy', cv=cv)

In [42]:
%%time
grid_cv.fit(train_data, train_labels)

CPU times: user 5.03 s, sys: 45.5 ms, total: 5.08 s
Wall time: 5.09 s


GridSearchCV(cv=<generator object BaseShuffleSplit.split at 0x7fa0b22850b0>,
             estimator=SGDClassifier(random_state=0),
             param_grid={'alpha': array([0.0001  , 0.000325, 0.00055 , 0.000775, 0.001   ]),
                         'loss': ['hinge', 'log', 'squared_hinge',
                                  'squared_loss'],
                         'n_iter_no_change': range(5, 10),
                         'penalty': ['l1', 'l2']},
             scoring='accuracy')

In [43]:
grid_cv.best_estimator_

SGDClassifier(alpha=0.00055, n_iter_no_change=6, penalty='l1', random_state=0)

In [45]:
print(grid_cv.best_score_)
print(grid_cv.best_params_)

0.9857142857142858
{'alpha': 0.00055, 'loss': 'hinge', 'n_iter_no_change': 6, 'penalty': 'l1'}


'accuracy'

#### Randomized grid search

In [58]:
randomized_grid_cv = model_selection.RandomizedSearchCV(classifier, parametres_grid,
                                                       scoring='accuracy', cv=cv, n_iter=20)

In [59]:
%%time
randomized_grid_cv.fit(train_data, train_labels)

CPU times: user 523 ms, sys: 6.15 ms, total: 529 ms
Wall time: 527 ms


RandomizedSearchCV(cv=<generator object BaseShuffleSplit.split at 0x7fa0b22c6f90>,
                   estimator=SGDClassifier(random_state=0), n_iter=20,
                   param_distributions={'alpha': array([0.0001  , 0.000325, 0.00055 , 0.000775, 0.001   ]),
                                        'loss': ['hinge', 'log',
                                                 'squared_hinge',
                                                 'squared_loss'],
                                        'n_iter_no_change': range(5, 10),
                                        'penalty': ['l1', 'l2']},
                   scoring='accuracy')

In [61]:
print(randomized_grid_cv.best_score_)
print(randomized_grid_cv.best_params_)

0.9571428571428571
{'penalty': 'l1', 'n_iter_no_change': 5, 'loss': 'log', 'alpha': 0.0007750000000000001}
