In [3]:
import numpy as np
import multiprocessing

from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression

# For reproducibility
np.random.seed(1000)

# Load dataset
iris = load_iris()

In [12]:
# Define a param grid
param_grid = [
    {
        'penalty': ['l1', 'l2'],
        'C': [0.5, 1.0, 1.5, 1.8, 2.0, 2.5]
    }
]

# Create and train a grid search
gs = GridSearchCV(estimator=LogisticRegression(max_iter = 1000), param_grid=param_grid,
                    scoring='accuracy', cv=10, n_jobs=multiprocessing.cpu_count())

gs.fit(iris.data, iris.target)

GridSearchCV(cv=10, estimator=LogisticRegression(max_iter=1000), n_jobs=4,
             param_grid=[{'C': [0.5, 1.0, 1.5, 1.8, 2.0, 2.5],
                          'penalty': ['l1', 'l2']}],
             scoring='accuracy')

When working with parallel algorithms, scikit-learn provides the ***n_jobs*** parameter, which allows us to specify how many threads must be used. Setting ***n_jobs=multiprocessing.cpu_count()*** is useful to exploit all CPU cores available on the current machine.

In [13]:
# Best estimator
print(gs.best_estimator_)

LogisticRegression(C=2.5, max_iter=1000)


In [16]:
gs_scores = cross_val_score(gs.best_estimator_, iris.data, iris.target, scoring='accuracy', cv=10)
print('Best estimator CV average score: %.3f' % gs_scores.mean())

Best estimator CV average score: 0.980


The type of penalty is not shown by default. But everything can be traced back using ***get_params()***.

In [17]:
gs.best_estimator_.get_params()

{'C': 2.5,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 1000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [18]:
from sklearn.linear_model import SGDClassifier

param_grid = [
    {
        'penalty': ['l1', 'l2', 'elasticnet'],
        'alpha': [1e-5, 1e-4, 5e-4, 1e-3, 2.3e-3, 5e-3, 1e-2],
        'l1_ratio': [0.01, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, 0.8]
    }
]

# Create SGD classifier
sgd = SGDClassifier(loss='perceptron', learning_rate='optimal')

In [22]:
# Create and train a grid search
gs = GridSearchCV(estimator=sgd, param_grid=param_grid, scoring='accuracy', cv=10,
                    n_jobs=multiprocessing.cpu_count())
gs.fit(iris.data, iris.target)

# Best estimator
print(gs.best_estimator_)
print(gs.best_estimator_.get_params())

SGDClassifier(alpha=0.005, l1_ratio=0.1, loss='perceptron', penalty='l1')
{'alpha': 0.005, 'average': False, 'class_weight': None, 'early_stopping': False, 'epsilon': 0.1, 'eta0': 0.0, 'fit_intercept': True, 'l1_ratio': 0.1, 'learning_rate': 'optimal', 'loss': 'perceptron', 'max_iter': 1000, 'n_iter_no_change': 5, 'n_jobs': None, 'penalty': 'l1', 'power_t': 0.5, 'random_state': None, 'shuffle': True, 'tol': 0.001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}


In [20]:
gs_scores = cross_val_score(gs.best_estimator_, iris.data, iris.target, scoring='accuracy', cv=10)
print('Best estimator CV average score: %.3f' % gs_scores.mean())

Best estimator CV average score: 0.980
