# Selecting Best Models Using Exhaustive Search

In [1]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import numpy as np

In [2]:
iris = load_iris()

In [3]:
Xi = iris.data
yi = iris.target

In [4]:
model = LogisticRegression()

In [13]:
param = {'C':np.logspace(0,4,10), 'penalty':['l1', 'l2']}

In [29]:
grid= GridSearchCV(model, [param], cv=10, scoring='accuracy')

In [35]:
best = grid.fit(Xi,yi)
import warnings
warnings.filterwarnings('ignore')

In [37]:
best.best_estimator_.get_params()

{'C': 2.7825594022071245,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [38]:
best.best_params_

{'C': 2.7825594022071245, 'penalty': 'l2'}

In [39]:
best.best_estimator_.predict(Xi)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [40]:
best.predict(Xi) # once fitted, the it is already the best model 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

# Selecting Best Models Using Randomized Search

In [41]:
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV

In [42]:
param = dict(C=uniform(loc=0, scale=4), penalty=['l1','l2'])

In [73]:
uniform(loc=0,scale=4).rvs(10)

array([2.52788801, 1.96094371, 3.15525246, 3.95501924, 2.29592568,
       2.64884108, 2.78680847, 1.42875095, 3.78133299, 0.47310103])

In [61]:
model1 = LogisticRegression()

In [66]:
Random = RandomizedSearchCV(model1, param, n_iter=100, cv=5, verbose=0, n_jobs=-1 )

In [None]:
# Random.fit(Xi,yi).predict(Xi)

In [None]:
# Random.best_params()
# Random.best_estimator_.get_params()['C']

# Selecting Best Models from Multiple Learning Algorithms

In [117]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC

In [120]:
pipe = Pipeline([('clf', LogisticRegression())]) # you can only put one classifier

In [123]:
params = [{'clf':[LogisticRegression()],
          'clf__C':np.logspace(0, 4, 10),
          'clf__penalty':['l1', 'l2']},
          {'clf':[RandomForestClassifier()],
          'clf__n_estimators':[10, 100, 1000],
          'clf__max_features':[1, 2, 3]}]

In [138]:
grid1 = GridSearchCV(pipe,params,cv=3, n_jobs=-1,verbose=0, scoring='accuracy')

In [142]:
import warnings
warnings.filterwarnings('ignore')
#grid1.fit(Xi,yi).best_estimator_.get_params()['clf']

# Selecting Best Models When Preprocessing

In [144]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion

In [150]:
preprocess = FeatureUnion([('std',StandardScaler()),
                           ('pca', PCA())])

In [154]:
pipe = Pipeline([('pre', preprocess),
                 ('clf', RandomForestClassifier())])

In [166]:
params1 = [{'pre__pca__n_components':[1, 2, 3],
          'clf__n_estimators':[10, 100, 1000],
          'clf__max_features':[1, 2, 3]}]

In [170]:
gd = GridSearchCV(pipe, params1, cv=3, verbose=0,  n_jobs=-1)

In [172]:
gd.fit(Xi,yi).best_estimator_.get_params()['pre__pca__n_components']

1

In [173]:
gd.predict(Xi)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

# Speeding Up Model Selection with Parallelization (set n_jobs=-1)

In [174]:
gd = GridSearchCV(pipe, params1, cv=3, verbose=1,  n_jobs=-1)

In [175]:
gd.fit(Xi,yi)

Fitting 3 folds for each of 27 candidates, totalling 81 fits


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('pre',
                                        FeatureUnion(transformer_list=[('std',
                                                                        StandardScaler()),
                                                                       ('pca',
                                                                        PCA())])),
                                       ('clf', RandomForestClassifier())]),
             n_jobs=-1,
             param_grid=[{'clf__max_features': [1, 2, 3],
                          'clf__n_estimators': [10, 100, 1000],
                          'pre__pca__n_components': [1, 2, 3]}],
             verbose=1)

# Speeding Up Model Selection Using AlgorithmSpecific Methods

In [5]:
from sklearn.linear_model import LogisticRegressionCV

In [6]:
model

LogisticRegression()

In [7]:
logitcv = LogisticRegressionCV(Cs=200)

In [8]:
logitcv.fit(Xi,yi)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

LogisticRegressionCV(Cs=200)

In [182]:
logitcv.fit(Xi,yi).predict(Xi)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

# Evaluating Performance After Model Selection

In [10]:
from sklearn.model_selection import cross_val_score

In [9]:
best = logitcv.fit(Xi,yi)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [11]:
cross_val_score(best, Xi, yi, cv=5, scoring='accuracy').mean()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.9733333333333334

In [2]:
import numpy as np

In [5]:
a = np.random.randint(1,10,(4,5))

In [6]:
a

array([[7, 4, 2, 1, 3],
       [4, 9, 4, 2, 8],
       [8, 5, 4, 9, 3],
       [7, 4, 3, 8, 5]])

In [29]:
a[1,3]=6000

In [31]:
a

array([[   7,    4,    2,    1,    3],
       [   4,    9,    4, 6000,    8],
       [   8,    5,    4,    9,    3],
       [   7,    4,    3,    8,    5]])

In [32]:
for i in np.nditer(a.T):
    print(i)

7
4
2
1
3
4
9
4
6000
8
8
5
4
9
3
7
4
3
8
5
