<a href="https://colab.research.google.com/github/KhazaeiM/Machine_Learning/blob/main/HyperParameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
import numpy as np

In [2]:
X = fetch_california_housing()["data"]
y = fetch_california_housing()["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


In [5]:
pipe =  Pipeline([("scaler", StandardScaler()), ("regressor", Ridge())])
pipe.fit(X_train, y_train)
print(pipe.score(X_test, y_test))

0.5943141338604155


In [7]:
pipe.get_params()

{'memory': None,
 'regressor': Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
       normalize=False, random_state=None, solver='auto', tol=0.001),
 'regressor__alpha': 1.0,
 'regressor__copy_X': True,
 'regressor__fit_intercept': True,
 'regressor__max_iter': None,
 'regressor__normalize': False,
 'regressor__random_state': None,
 'regressor__solver': 'auto',
 'regressor__tol': 0.001,
 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'steps': [('scaler',
   StandardScaler(copy=True, with_mean=True, with_std=True)),
  ('regressor',
   Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
         normalize=False, random_state=None, solver='auto', tol=0.001))],
 'verbose': False}

In [10]:
param_grid =  {'regressor__alpha': np.logspace(-3, 3, 20)}
grid_search = GridSearchCV(pipe, param_grid, cv=5, n_jobs=2, verbose=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  90 tasks      | elapsed:    1.4s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    1.5s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('regressor',
                                        Ridge(alpha=1.0, copy_X=True,
                                              fit_intercept=True, max_iter=None,
                                              normalize=False,
                                              random_state=None, solver='auto',
                                              tol=0.001))],
                                verbose=False),
             iid='deprecated', n_jobs=2,
             param_grid={'regressor__alpha': array...3240e-03, 8.85866790e-03,
       1.83298071e-02, 3.79269019e-02, 7.84759970e-02, 1.62377674e-01,
       3.35981

In [11]:
grid_search.best_params_

{'regressor__alpha': 12.742749857031322}

In [12]:
grid_search.best_score_

0.6053953632414377

In [16]:
from tempfile import mkdtemp
from shutil import rmtree

cachedir = mkdtemp()
pipe_cache = Pipeline([('scaler', StandardScaler()), ('regressor', Ridge())], memory=cachedir)
pipe_cache.fit(X_train, y_train)
rmtree(cachedir)

In [17]:
param_grid =  {'alpha': np.logspace(-3, 3, 20)}
grid_search = GridSearchCV(Ridge(), param_grid, cv=5, n_jobs=2, verbose=1)

pipe2  = Pipeline([('scaler', StandardScaler()), ('grid_search', grid_search)])
pipe2.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  90 tasks      | elapsed:    1.2s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    1.2s finished


Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('grid_search',
                 GridSearchCV(cv=5, error_score=nan,
                              estimator=Ridge(alpha=1.0, copy_X=True,
                                              fit_intercept=True, max_iter=None,
                                              normalize=False,
                                              random_state=None, solver='auto',
                                              tol=0.001),
                              iid='deprecated', n_jobs=2,
                              param_grid={'alpha': array([1.00000000e-03, 2.0691380...90e-03,
       1.83298071e-02, 3.79269019e-02, 7.84759970e-02, 1.62377674e-01,
       3.35981829e-01, 6.95192796e-01, 1.43844989e+00, 2.97635144e+00,
       6.15848211e+00, 1.27427499e+01, 2.63665090e+01, 5.45559478e+01,
       1.12883789e+02, 2.33572147e+02, 4.83293024e+02, 1.00000000e+03])},

In [19]:
pipe2.named_steps['grid_search'].best_params_

{'alpha': 26.366508987303554}

In [25]:
from sklearn.decomposition import PCA

cachedir =  mkdtemp()
pipe3 = Pipeline([('scaler', StandardScaler()), ('dim-red', PCA()), ('regressor', Ridge())])
param_grid = {'dim-red__n_components':[2, 3, 4, 5, 6],
              'regressor__alpha': np.logspace(-3, 3, 20)}
grid_search = GridSearchCV(pip3, param_grid, cv=5, n_jobs=2, verbose=1)
grid_search.fit(X_train, y_train)
rmtree(cachedir)


Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done 300 tasks      | elapsed:    4.9s
[Parallel(n_jobs=2)]: Done 500 out of 500 | elapsed:    8.2s finished


In [26]:
grid_search.best_params_

{'dim-red__n_components': 6, 'regressor__alpha': 26.366508987303554}

In [28]:
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(pipe3, param_grid, cv=5, verbose=1, n_jobs=2, n_iter=20)
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    1.7s finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=Pipeline(memory=None,
                                      steps=[('scaler',
                                              StandardScaler(copy=True,
                                                             with_mean=True,
                                                             with_std=True)),
                                             ('dim-red',
                                              PCA(copy=True,
                                                  iterated_power='auto',
                                                  n_components=None,
                                                  random_state=None,
                                                  svd_solver='auto', tol=0.0,
                                                  whiten=False)),
                                             ('regressor',
                                              Ridge(alpha=1.0, copy_X=True,
                

In [32]:
from sklearn.model_selection import RandomizedSearchCV

cachedir =  mkdtemp()
pipe4 = Pipeline([('scaler', StandardScaler()), ('dim-red', PCA()), ('regressor', Ridge())], memory=cachedir)
param_grid = {'dim-red__n_components': range(1, 9),
              'regressor__alpha': np.logspace(-3, 3, 200)}
random_search = RandomizedSearchCV(pipe4, param_grid, cv=5, n_jobs=2, verbose=1, n_iter=100)
random_search.fit(X_train, y_train)
random_search.score(X_test, y_test)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done 202 tasks      | elapsed:    3.3s
[Parallel(n_jobs=2)]: Done 500 out of 500 | elapsed:    6.0s finished


0.5941891003619316