In [1]:
from sklearn import model_selection, datasets, linear_model, metrics

import numpy as np
import pandas as pd

In [2]:
iris = datasets.load_iris()

In [30]:
train_data, test_data, train_labels, test_labels = model_selection.train_test_split(iris.data, iris.target, test_size=0.3, random_state=0)

In [31]:
clf = linear_model.SGDClassifier(random_state=0)

In [32]:
clf.get_params()

{'alpha': 0.0001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 1000,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l2',
 'power_t': 0.5,
 'random_state': 0,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [44]:
parameters = {
    'loss': ['hinge', 'log', 'squared_hinge', 'squared_loss'],
    'penalty': ['l1', 'l2'],
    'max_iter': range(5, 10),
    'alpha': np.linspace(0.0001, 0.001, 5)
}

In [45]:
cv = model_selection.StratifiedShuffleSplit(n_splits=10, test_size=0.2, random_state=0)

In [46]:
cv

StratifiedShuffleSplit(n_splits=10, random_state=0, test_size=0.2,
            train_size=None)

In [47]:
cv.split(train_data, train_labels)

<generator object BaseShuffleSplit.split at 0x000001F26B3AE740>

In [48]:
train_data.shape

(105, 4)

In [49]:
train_labels.shape

(105,)

In [50]:
test_data.shape

(45, 4)

In [51]:
test_labels.shape

(45,)

In [52]:
for train_indices, test_indices in cv.split(train_data, train_labels):
    print(train_indices, test_indices)

[ 76  57   9  50  31  10  56  61  99  88  32 104  78  89  49  45  84  93
  85   5  44  20  24  72 103  55  86  11   3  34  66  39  64  29  46   7
  17  12  92  40  28  67  42   0   6  68  18  38  82  30  81  75  15  79
  33  53  58  65  74  13  52  95  25  54  23   8  87 101  77  14  73  71
  83  98  47  59  51  63  60  26  90  94  21  48] [ 22 102  41 100  35  27  62  80  70  43  69  91  96   2  36  37  97   1
  19   4  16]
[  7  20 100  19  72  63  47  53  71  95  70  59  83 101  13  44   5  21
  12  64  34  37  28  97  80  52  29 103  46  14  66  56  42  81  84  18
   1   6  87  54  48  50   0  86  62  94  93  38  85  99  10  11  60  79
  88  17  23  55  75  96  65  98  15  22  16  25  91  39  45  43  89  82
  31  61  26  49   9  78  57  24   2  77  35   3] [ 73  68  90  74  92  32 104  36  51  41  69  67  27 102  30   8   4  33
  40  58  76]
[ 71  20  72  75  98  83  50 103  97  95  70  30  66  59  24  28  25  85
   1  27  78  35  40  33  58  49  87 101  92  86  64  34  84  88  80 

In [53]:
grid_cv = model_selection.GridSearchCV(clf, parameters, scoring='accuracy', cv=cv)

In [55]:
import warnings
warnings.filterwarnings('ignore')

In [56]:
%%time
grid_cv.fit(train_data, train_labels)

CPU times: total: 4.75 s
Wall time: 4.82 s


GridSearchCV(cv=StratifiedShuffleSplit(n_splits=10, random_state=0, test_size=0.2,
            train_size=None),
             estimator=SGDClassifier(random_state=0),
             param_grid={'alpha': array([0.0001  , 0.000325, 0.00055 , 0.000775, 0.001   ]),
                         'loss': ['hinge', 'log', 'squared_hinge',
                                  'squared_loss'],
                         'max_iter': range(5, 10), 'penalty': ['l1', 'l2']},
             scoring='accuracy')

In [57]:
grid_cv.best_estimator_

SGDClassifier(alpha=0.0007750000000000001, max_iter=9, penalty='l1',
              random_state=0)

In [68]:
pp = grid_cv.best_params_
pp

{'alpha': 0.0007750000000000001,
 'loss': 'hinge',
 'max_iter': 9,
 'penalty': 'l1'}

In [60]:
grid_cv.best_score_

0.9047619047619048

In [84]:
ff = linear_model.SGDClassifier(alpha=0.0007750000000000001,
 loss='hinge',
 max_iter=9,
 penalty='l1',
 random_state=0
)

In [85]:
ff.fit(train_data, train_labels)

SGDClassifier(alpha=0.0007750000000000001, max_iter=9, penalty='l1',
              random_state=0)

In [86]:
ff.score(train_data, train_labels)

0.9428571428571428

In [87]:
grid_cv.best_estimator_.score(train_data, train_labels)

0.9428571428571428

In [88]:
grid_cv.best_estimator_.get_params()

{'alpha': 0.0007750000000000001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 9,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l1',
 'power_t': 0.5,
 'random_state': 0,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [89]:
ff.get_params() == grid_cv.best_estimator_.get_params()

True

In [90]:
ff.get_params()

{'alpha': 0.0007750000000000001,
 'average': False,
 'class_weight': None,
 'early_stopping': False,
 'epsilon': 0.1,
 'eta0': 0.0,
 'fit_intercept': True,
 'l1_ratio': 0.15,
 'learning_rate': 'optimal',
 'loss': 'hinge',
 'max_iter': 9,
 'n_iter_no_change': 5,
 'n_jobs': None,
 'penalty': 'l1',
 'power_t': 0.5,
 'random_state': 0,
 'shuffle': True,
 'tol': 0.001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [91]:
for param in ff.get_params().keys():
    if ff.get_params()[param] != grid_cv.best_estimator_.get_params()[param]:
        print(param)

In [93]:
grid_prikol = model_selection.GridSearchCV(clf, parameters)

In [94]:
grid_prikol.get_params()

{'cv': None,
 'error_score': nan,
 'estimator__alpha': 0.0001,
 'estimator__average': False,
 'estimator__class_weight': None,
 'estimator__early_stopping': False,
 'estimator__epsilon': 0.1,
 'estimator__eta0': 0.0,
 'estimator__fit_intercept': True,
 'estimator__l1_ratio': 0.15,
 'estimator__learning_rate': 'optimal',
 'estimator__loss': 'hinge',
 'estimator__max_iter': 1000,
 'estimator__n_iter_no_change': 5,
 'estimator__n_jobs': None,
 'estimator__penalty': 'l2',
 'estimator__power_t': 0.5,
 'estimator__random_state': 0,
 'estimator__shuffle': True,
 'estimator__tol': 0.001,
 'estimator__validation_fraction': 0.1,
 'estimator__verbose': 0,
 'estimator__warm_start': False,
 'estimator': SGDClassifier(random_state=0),
 'n_jobs': None,
 'param_grid': {'loss': ['hinge', 'log', 'squared_hinge', 'squared_loss'],
  'penalty': ['l1', 'l2'],
  'max_iter': range(5, 10),
  'alpha': array([0.0001  , 0.000325, 0.00055 , 0.000775, 0.001   ])},
 'pre_dispatch': '2*n_jobs',
 'refit': True,
 'retu

In [102]:
grid_prikol.fit(train_data, train_labels)

GridSearchCV(estimator=SGDClassifier(random_state=0),
             param_grid={'alpha': array([0.0001  , 0.000325, 0.00055 , 0.000775, 0.001   ]),
                         'loss': ['hinge', 'log', 'squared_hinge',
                                  'squared_loss'],
                         'max_iter': range(5, 10), 'penalty': ['l1', 'l2']})

In [103]:
grid_prikol.best_estimator_.score(train_data, train_labels)

0.9238095238095239