In [1]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
iris = load_iris()
logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,
                              random_state=0)
distributions = dict(C=uniform(loc=0, scale=4),
                     penalty=['l2', 'l1'])
clf = RandomizedSearchCV(logistic, distributions, random_state=0)
search = clf.fit(iris.data, iris.target)
search.best_params_

{'C': np.float64(2.195254015709299), 'penalty': 'l1'}

In [3]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00092397, 0.0018003 , 0.00142837, 0.00101542]),
 'std_fit_time': array([0.00025633, 0.00081025, 0.00030957, 0.00022945]),
 'mean_score_time': array([0.00052872, 0.00113888, 0.00089946, 0.00059228]),
 'std_score_time': array([4.96837319e-05, 2.83613763e-04, 2.35238980e-04, 1.41539406e-04]),
 'param_C': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf'],
              mask=[False, False, False, False],
        fill_value=np.str_('?'),
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
  {'C': 1, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'}],
 'split0_test_score': array([0.96666667, 0.96666667, 1.        , 0.96666667]),
 'split1_test_score': array([1.        , 0.96666667, 1.        , 1.        ]),
 'split2_test_score': array([0.96666667, 0.96666667, 0.9       , 0.96666667]),
 'spli

In [4]:
from time import time

import numpy as np
import scipy.stats as stats

from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# get some data
X, y = load_digits(return_X_y=True, n_class=3)

# build a classifier
clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)


# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_score"] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print(
                "Mean validation score: {0:.3f} (std: {1:.3f})".format(
                    results["mean_test_score"][candidate],
                    results["std_test_score"][candidate],
                )
            )
            print("Parameters: {0}".format(results["params"][candidate]))
            print("")


# specify parameters and distributions to sample from
param_dist = {
    "average": [True, False],
    "l1_ratio": stats.uniform(0, 1),
    "alpha": stats.loguniform(1e-2, 1e0),
}

# run randomized search
n_iter_search = 15
random_search = RandomizedSearchCV(
    clf, param_distributions=param_dist, n_iter=n_iter_search
)

start = time()
random_search.fit(X, y)
print(
    "RandomizedSearchCV took %.2f seconds for %d candidates parameter settings."
    % ((time() - start), n_iter_search)
)
report(random_search.cv_results_)

# use a full grid over all parameters
param_grid = {
    "average": [True, False],
    "l1_ratio": np.linspace(0, 1, num=10),
    "alpha": np.power(10, np.arange(-2, 1, dtype=float)),
}

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

print(
    "GridSearchCV took %.2f seconds for %d candidate parameter settings."
    % (time() - start, len(grid_search.cv_results_["params"]))
)
report(grid_search.cv_results_)

RandomizedSearchCV took 0.93 seconds for 15 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.993 (std: 0.011)
Parameters: {'alpha': np.float64(0.9618041303454363), 'average': False, 'l1_ratio': np.float64(0.014040963651391625)}

Model with rank: 2
Mean validation score: 0.985 (std: 0.013)
Parameters: {'alpha': np.float64(0.054369578159338315), 'average': False, 'l1_ratio': np.float64(0.4285242701765367)}

Model with rank: 3
Mean validation score: 0.985 (std: 0.013)
Parameters: {'alpha': np.float64(0.05071592069277454), 'average': False, 'l1_ratio': np.float64(0.06174162393126825)}

GridSearchCV took 4.22 seconds for 60 candidate parameter settings.
Model with rank: 1
Mean validation score: 0.993 (std: 0.007)
Parameters: {'alpha': np.float64(0.1), 'average': False, 'l1_ratio': np.float64(0.3333333333333333)}

Model with rank: 2
Mean validation score: 0.989 (std: 0.022)
Parameters: {'alpha': np.float64(0.1), 'average': False, 'l1_ratio': np.float64(0.77777777777

In [5]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV
X, y = load_iris(return_X_y=True)
clf = RandomForestClassifier(random_state=0)
param_grid = {"max_depth": [3, None],
              "min_samples_split": [5, 10]}
search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',
                             max_resources=10,
                             random_state=0).fit(X, y)
search.best_params_  

{'max_depth': None, 'min_samples_split': 5, 'n_estimators': 9}

In [6]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingRandomSearchCV
from scipy.stats import randint
import numpy as np
X, y = load_iris(return_X_y=True)
clf = RandomForestClassifier(random_state=0)
np.random.seed(0)
param_distributions = {"max_depth": [3, None],
                       "min_samples_split": randint(2, 11)}
search = HalvingRandomSearchCV(clf, param_distributions,
                               resource='n_estimators',
                               max_resources=10,
                               random_state=0).fit(X, y)
search.best_params_  

{'max_depth': 3, 'min_samples_split': 3, 'n_estimators': 9}