In [6]:
import pandas as pd 
from sklearn.datasets import load_diabetes

In [26]:
diabetes=load_diabetes(as_frame=True)
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [None]:
#If diabetes=load_diabetes() - then it comes as numpy array, 
# diabetes=load_diabetes(as_frame=True) - comes as pandas dataframe object 

In [27]:
df=pd.concat([diabetes.data,diabetes.target], axis=True)    #or use df= diabetes.frame
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [15]:
df.columns

Index(['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6',
       'target'],
      dtype='object')

In [28]:
## Why not to use this method
df1=pd.DataFrame(diabetes.data,columns=diabetes.feature_names)
df1.columns

Index(['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], dtype='object')

In [18]:
df1.concat(diabetes.target,axis=True)
df1.head()

AttributeError: 'DataFrame' object has no attribute 'concat'

In [25]:
print("Attribute names: ", diabetes.feature_names)
print("Total Observations: ", len(df))

Attribute names:  ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
Total Observations:  442


In [32]:
import sklearn
help(sklearn)

Help on package sklearn:

NAME
    sklearn - Configure global settings and get information about the working environment.

PACKAGE CONTENTS
    __check_build (package)
    _build_utils (package)
    _built_with_meson
    _config
    _distributor_init
    _isotonic
    _loss (package)
    _min_dependencies
    base
    calibration
    cluster (package)
    compose (package)
    conftest
    covariance (package)
    cross_decomposition (package)
    datasets (package)
    decomposition (package)
    discriminant_analysis
    dummy
    ensemble (package)
    exceptions
    experimental (package)
    externals (package)
    feature_extraction (package)
    feature_selection (package)
    gaussian_process (package)
    impute (package)
    inspection (package)
    isotonic
    kernel_approximation
    kernel_ridge
    linear_model (package)
    manifold (package)
    metrics (package)
    mixture (package)
    model_selection (package)
    multiclass
    multioutput
    naive_bayes
    neig

In [37]:
from sklearn import linear_model
help(linear_model)

Help on package sklearn.linear_model in sklearn:

NAME
    sklearn.linear_model - A variety of linear models.

PACKAGE CONTENTS
    _base
    _bayes
    _cd_fast
    _coordinate_descent
    _glm (package)
    _huber
    _least_angle
    _linear_loss
    _logistic
    _omp
    _passive_aggressive
    _perceptron
    _quantile
    _ransac
    _ridge
    _sag
    _sag_fast
    _sgd_fast
    _stochastic_gradient
    _theil_sen
    tests (package)

CLASSES
    sklearn.base.BaseEstimator(sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin, sklearn.utils._metadata_requests._MetadataRequester)
        sklearn.linear_model._huber.HuberRegressor(sklearn.linear_model._base.LinearModel, sklearn.base.RegressorMixin, sklearn.base.BaseEstimator)
        sklearn.linear_model._logistic.LogisticRegression(sklearn.linear_model._base.LinearClassifierMixin, sklearn.linear_model._base.SparseCoefMixin, sklearn.base.BaseEstimator)
            sklearn.linear_model._logistic.LogisticRegressionCV(skle

In [41]:
print(dir(linear_model._bayes))

['ARDRegression', 'BayesianRidge', 'Integral', 'Interval', 'LinearModel', 'Real', 'RegressorMixin', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_check_sample_weight', '_fit_context', '_preprocess_data', '_rescale_data', '_safe_indexing', 'fast_logdet', 'linalg', 'log', 'np', 'pinvh']


In [42]:
from sklearn import tree
print(dir(tree))

['BaseDecisionTree', 'DecisionTreeClassifier', 'DecisionTreeRegressor', 'ExtraTreeClassifier', 'ExtraTreeRegressor', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_classes', '_criterion', '_export', '_reingold_tilford', '_splitter', '_tree', '_utils', 'export_graphviz', 'export_text', 'plot_tree']


In [43]:
from sklearn import ensemble
print(dir(ensemble))

['AdaBoostClassifier', 'AdaBoostRegressor', 'BaggingClassifier', 'BaggingRegressor', 'BaseEnsemble', 'ExtraTreesClassifier', 'ExtraTreesRegressor', 'GradientBoostingClassifier', 'GradientBoostingRegressor', 'HistGradientBoostingClassifier', 'HistGradientBoostingRegressor', 'IsolationForest', 'RandomForestClassifier', 'RandomForestRegressor', 'RandomTreesEmbedding', 'StackingClassifier', 'StackingRegressor', 'VotingClassifier', 'VotingRegressor', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_bagging', '_base', '_forest', '_gb', '_gradient_boosting', '_hist_gradient_boosting', '_iforest', '_stacking', '_voting', '_weight_boosting']


In [47]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor

In [74]:
lr=LinearRegression()
ridge=Ridge()
dtr=DecisionTreeRegressor(max_depth=4, random_state=42)

In [46]:
help(Ridge)

Help on class Ridge in module sklearn.linear_model._ridge:

class Ridge(sklearn.base.MultiOutputMixin, sklearn.base.RegressorMixin, _BaseRidge)
 |  Ridge(alpha=1.0, *, fit_intercept=True, copy_X=True, max_iter=None, tol=0.0001, solver='auto', positive=False, random_state=None)
 |
 |  Linear least squares with l2 regularization.
 |
 |  Minimizes the objective function::
 |
 |  ||y - Xw||^2_2 + alpha * ||w||^2_2
 |
 |  This model solves a regression model where the loss function is
 |  the linear least squares function and regularization is given by
 |  the l2-norm. Also known as Ridge Regression or Tikhonov regularization.
 |  This estimator has built-in support for multi-variate regression
 |  (i.e., when y is a 2d-array of shape (n_samples, n_targets)).
 |
 |  Read more in the :ref:`User Guide <ridge_regression>`.
 |
 |  Parameters
 |  ----------
 |  alpha : {float, ndarray of shape (n_targets,)}, default=1.0
 |      Constant that multiplies the L2 term, controlling regularization
 | 

In [51]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(diabetes.data, diabetes.target, test_size=0.3,random_state=42)

In [78]:
lr.fit(x_train,y_train)
print("Coefficient of Determination R^2 using Linear regression is: ",lr.score(x_test, y_test))

ridge.fit(x_train,y_train)
print("Coefficient of Determination R^2 using Ridge regression is: ",ridge.score(x_test, y_test))

dtr.fit(x_train,y_train)
print("Coefficient of Determination R^2 using Decision tree regressor is: ",dtr.score(x_test, y_test))

Coefficient of Determination R^2 using Linear regression is:  0.4772897164322617
Coefficient of Determination R^2 using Ridge regression is:  0.4233440269603016
Coefficient of Determination R^2 using Decision tree regressor is:  0.3239689083020818


**Hyperparameter tuning with GridSearchCV**

In [79]:
from sklearn.model_selection import GridSearchCV

In [85]:
clf_ridge=GridSearchCV(ridge,{
    'alpha': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']},cv=5)
clf_ridge.fit(x_train,y_train)
clf_ridge.cv_results_

45 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\linear_model\_ridge.py", line 1251, in fit
    return super().fit(X, y, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File

{'mean_fit_time': array([0.00670891, 0.00661879, 0.00331464, 0.0064209 , 0.00378003,
        0.00493855, 0.00240164, 0.00113173, 0.0014019 , 0.00139499,
        0.00160041, 0.00200148, 0.00180168, 0.00210304, 0.00201626,
        0.0009995 , 0.00170836, 0.00160089, 0.00120697, 0.00140014,
        0.00160556, 0.00231004, 0.00322266, 0.00178542, 0.00159678,
        0.00160279, 0.0013905 , 0.00120025, 0.00240083, 0.00180578,
        0.00182834, 0.00100021, 0.00160022, 0.00190144, 0.00199847,
        0.00141397, 0.00168476, 0.00166717, 0.00201168, 0.00140185,
        0.00170941, 0.00159965, 0.00139809, 0.00169139, 0.00165577,
        0.00218949, 0.00159874, 0.00142198, 0.00268455, 0.00200548,
        0.00159478, 0.00263071, 0.00230217, 0.00212331, 0.00219202,
        0.00100546, 0.00198994, 0.00198412, 0.00161185, 0.0018189 ,
        0.00145674, 0.00199461, 0.00201802, 0.0013998 , 0.00140333,
        0.0016058 , 0.00158787, 0.00200214, 0.00180039, 0.00199018,
        0.00180101, 0.00100012]

In [87]:
df_ridge=pd.DataFrame(clf_ridge.cv_results_)
df_ridge.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.006709,0.003312,0.003702,0.00099,0.1,auto,"{'alpha': 0.1, 'solver': 'auto'}",0.525452,0.329423,0.540739,0.568847,0.289792,0.450851,0.116836,3
1,0.006619,0.005974,0.002403,0.000525,0.1,svd,"{'alpha': 0.1, 'solver': 'svd'}",0.525452,0.329423,0.540739,0.568847,0.289792,0.450851,0.116836,3
2,0.003315,0.000703,0.002413,0.000794,0.1,cholesky,"{'alpha': 0.1, 'solver': 'cholesky'}",0.525452,0.329423,0.540739,0.568847,0.289792,0.450851,0.116836,3
3,0.006421,0.003611,0.002893,0.000649,0.1,lsqr,"{'alpha': 0.1, 'solver': 'lsqr'}",0.525452,0.329422,0.540748,0.56884,0.289775,0.450847,0.11684,7
4,0.00378,0.001724,0.002311,0.000774,0.1,sparse_cg,"{'alpha': 0.1, 'solver': 'sparse_cg'}",0.525452,0.329422,0.540748,0.56884,0.289776,0.450848,0.11684,6


In [93]:
df_ridge[['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
0,"{'alpha': 0.1, 'solver': 'auto'}",0.450851
1,"{'alpha': 0.1, 'solver': 'svd'}",0.450851
2,"{'alpha': 0.1, 'solver': 'cholesky'}",0.450851
3,"{'alpha': 0.1, 'solver': 'lsqr'}",0.450847
4,"{'alpha': 0.1, 'solver': 'sparse_cg'}",0.450848
...,...,...
67,"{'alpha': 0.9, 'solver': 'lsqr'}",0.366852
68,"{'alpha': 0.9, 'solver': 'sparse_cg'}",0.366849
69,"{'alpha': 0.9, 'solver': 'sag'}",0.366849
70,"{'alpha': 0.9, 'solver': 'saga'}",0.366848


In [88]:
clf_ridge.best_params_

{'alpha': 0.1, 'solver': 'sag'}

In [94]:
clf_ridge.best_score_

0.4508537088138834

In [103]:
clf_decision=GridSearchCV(dtr,{
    'max_depth': [0,1,2,3,4,5,6], 'min_samples_split': [2,3,4,5,10], 'splitter': ['best','random']}, cv=5
    )
clf_decision.fit(x_train,y_train)
clf_decision.cv_results_

50 fits failed out of a total of 350.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParamete

{'mean_fit_time': array([0.00130315, 0.00099621, 0.0009994 , 0.00039988, 0.0007988 ,
        0.00040145, 0.00038438, 0.00121479, 0.00104318, 0.00048232,
        0.00292211, 0.00179644, 0.00442481, 0.00393491, 0.00326414,
        0.00386987, 0.00289812, 0.00267177, 0.00337696, 0.00140944,
        0.00169764, 0.0014029 , 0.00199113, 0.00120401, 0.00269008,
        0.00207429, 0.00208912, 0.00146308, 0.00220795, 0.00267344,
        0.00233088, 0.00203872, 0.00226884, 0.00119882, 0.00188389,
        0.00139327, 0.00160098, 0.0014019 , 0.00198779, 0.00160913,
        0.00200257, 0.00156732, 0.00240674, 0.00180082, 0.00335526,
        0.00172472, 0.00219431, 0.00160027, 0.00202255, 0.00210042,
        0.00220499, 0.00139284, 0.00220127, 0.00140395, 0.00285473,
        0.00164518, 0.00239596, 0.00139742, 0.00227652, 0.00188332,
        0.00313282, 0.00247288, 0.00419478, 0.00260201, 0.00352273,
        0.00182705, 0.0035449 , 0.00266147, 0.0035418 , 0.00187645]),
 'std_fit_time': array([7.491

In [104]:
df_decision=pd.DataFrame(clf_decision.cv_results_)
df_decision

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001303,0.000749,0.000000,0.000000,0,2,best,"{'max_depth': 0, 'min_samples_split': 2, 'spli...",,,,,,,,61
1,0.000996,0.000009,0.000000,0.000000,0,2,random,"{'max_depth': 0, 'min_samples_split': 2, 'spli...",,,,,,,,61
2,0.000999,0.000002,0.000000,0.000000,0,3,best,"{'max_depth': 0, 'min_samples_split': 3, 'spli...",,,,,,,,61
3,0.000400,0.000490,0.000000,0.000000,0,3,random,"{'max_depth': 0, 'min_samples_split': 3, 'spli...",,,,,,,,61
4,0.000799,0.000399,0.000000,0.000000,0,4,best,"{'max_depth': 0, 'min_samples_split': 4, 'spli...",,,,,,,,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0.001827,0.000443,0.001232,0.000375,6,4,random,"{'max_depth': 6, 'min_samples_split': 4, 'spli...",0.306456,-0.054734,0.341181,0.333434,-0.106554,0.163957,0.200718,43
66,0.003545,0.000511,0.001299,0.000397,6,5,best,"{'max_depth': 6, 'min_samples_split': 5, 'spli...",0.042442,-0.298309,0.431557,0.426160,0.154862,0.151342,0.271331,50
67,0.002661,0.000695,0.001496,0.000605,6,5,random,"{'max_depth': 6, 'min_samples_split': 5, 'spli...",0.076659,-0.018040,0.407734,0.317225,-0.023179,0.152080,0.177751,46
68,0.003542,0.000392,0.001723,0.000685,6,10,best,"{'max_depth': 6, 'min_samples_split': 10, 'spl...",0.104861,-0.222155,0.425228,0.480321,0.191687,0.195988,0.251620,28


In [105]:
df_decision[['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
0,"{'max_depth': 0, 'min_samples_split': 2, 'spli...",
1,"{'max_depth': 0, 'min_samples_split': 2, 'spli...",
2,"{'max_depth': 0, 'min_samples_split': 3, 'spli...",
3,"{'max_depth': 0, 'min_samples_split': 3, 'spli...",
4,"{'max_depth': 0, 'min_samples_split': 4, 'spli...",
...,...,...
65,"{'max_depth': 6, 'min_samples_split': 4, 'spli...",0.163957
66,"{'max_depth': 6, 'min_samples_split': 5, 'spli...",0.151342
67,"{'max_depth': 6, 'min_samples_split': 5, 'spli...",0.152080
68,"{'max_depth': 6, 'min_samples_split': 10, 'spl...",0.195988


In [106]:
clf_decision.best_params_

{'max_depth': 3, 'min_samples_split': 2, 'splitter': 'best'}

In [113]:
clf_decision.best_score_

0.2909833491625391

In [108]:
clf_lasso=GridSearchCV(Lasso(random_state=42),{
    'alpha': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'selection': ['cycle','random']}, cv=5
)
clf_lasso.fit(x_train,y_train)
clf_lasso.cv_results_

45 fits failed out of a total of 90.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameter

{'mean_fit_time': array([0.00101871, 0.00576248, 0.00133624, 0.00516477, 0.00102973,
        0.00468011, 0.00119853, 0.0047863 , 0.0002058 , 0.00251555,
        0.00138159, 0.00314231, 0.0001986 , 0.00180354, 0.        ,
        0.00179982, 0.00095143, 0.00211992]),
 'std_fit_time': array([6.34019520e-04, 4.64762080e-03, 8.76080332e-04, 1.22399076e-03,
        3.54017471e-05, 5.34821998e-04, 1.95667907e-04, 1.83467704e-03,
        4.11605835e-04, 5.92727807e-04, 3.65492288e-04, 1.19161708e-03,
        3.97205353e-04, 4.04263673e-04, 0.00000000e+00, 4.05305049e-04,
        5.04698910e-04, 7.65478686e-04]),
 'mean_score_time': array([0.        , 0.00220122, 0.        , 0.00342426, 0.        ,
        0.00268469, 0.        , 0.00254211, 0.        , 0.00160294,
        0.        , 0.00232964, 0.        , 0.00120587, 0.        ,
        0.0012032 , 0.        , 0.00111279]),
 'std_score_time': array([0.        , 0.00115775, 0.        , 0.0010704 , 0.        ,
        0.00080322, 0.        , 

In [109]:
df_lasso=pd.DataFrame(clf_lasso.cv_results_)
df_lasso

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_selection,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001019,0.000634,0.0,0.0,0.1,cycle,"{'alpha': 0.1, 'selection': 'cycle'}",,,,,,,,10
1,0.005762,0.004648,0.002201,0.001158,0.1,random,"{'alpha': 0.1, 'selection': 'random'}",0.502519,0.313789,0.545234,0.571553,0.299543,0.446528,0.11639,1
2,0.001336,0.000876,0.0,0.0,0.2,cycle,"{'alpha': 0.2, 'selection': 'cycle'}",,,,,,,,10
3,0.005165,0.001224,0.003424,0.00107,0.2,random,"{'alpha': 0.2, 'selection': 'random'}",0.470458,0.314266,0.54635,0.557297,0.309421,0.439558,0.108495,2
4,0.00103,3.5e-05,0.0,0.0,0.3,cycle,"{'alpha': 0.3, 'selection': 'cycle'}",,,,,,,,10
5,0.00468,0.000535,0.002685,0.000803,0.3,random,"{'alpha': 0.3, 'selection': 'random'}",0.436214,0.309103,0.539156,0.538489,0.310177,0.426628,0.102606,3
6,0.001199,0.000196,0.0,0.0,0.4,cycle,"{'alpha': 0.4, 'selection': 'cycle'}",,,,,,,,10
7,0.004786,0.001835,0.002542,0.000376,0.4,random,"{'alpha': 0.4, 'selection': 'random'}",0.41315,0.317269,0.524722,0.520451,0.304701,0.416059,0.094735,4
8,0.000206,0.000412,0.0,0.0,0.5,cycle,"{'alpha': 0.5, 'selection': 'cycle'}",,,,,,,,10
9,0.002516,0.000593,0.001603,0.000489,0.5,random,"{'alpha': 0.5, 'selection': 'random'}",0.387955,0.322375,0.511321,0.501657,0.297718,0.404205,0.088623,5


In [110]:
df_lasso[['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
0,"{'alpha': 0.1, 'selection': 'cycle'}",
1,"{'alpha': 0.1, 'selection': 'random'}",0.446528
2,"{'alpha': 0.2, 'selection': 'cycle'}",
3,"{'alpha': 0.2, 'selection': 'random'}",0.439558
4,"{'alpha': 0.3, 'selection': 'cycle'}",
5,"{'alpha': 0.3, 'selection': 'random'}",0.426628
6,"{'alpha': 0.4, 'selection': 'cycle'}",
7,"{'alpha': 0.4, 'selection': 'random'}",0.416059
8,"{'alpha': 0.5, 'selection': 'cycle'}",
9,"{'alpha': 0.5, 'selection': 'random'}",0.404205


In [111]:
clf_lasso.best_params_

{'alpha': 0.1, 'selection': 'random'}

In [112]:
clf_lasso.best_score_

0.4465276896257201

**Hyperparameter Tuning with RandomSearchCV**

In [115]:
print(dir(sklearn.model_selection))

['BaseCrossValidator', 'BaseShuffleSplit', 'FixedThresholdClassifier', 'GridSearchCV', 'GroupKFold', 'GroupShuffleSplit', 'KFold', 'LearningCurveDisplay', 'LeaveOneGroupOut', 'LeaveOneOut', 'LeavePGroupsOut', 'LeavePOut', 'ParameterGrid', 'ParameterSampler', 'PredefinedSplit', 'RandomizedSearchCV', 'RepeatedKFold', 'RepeatedStratifiedKFold', 'ShuffleSplit', 'StratifiedGroupKFold', 'StratifiedKFold', 'StratifiedShuffleSplit', 'TimeSeriesSplit', 'TunedThresholdClassifierCV', 'ValidationCurveDisplay', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__getattr__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_classification_threshold', '_plot', '_search', '_split', '_validation', 'check_cv', 'cross_val_predict', 'cross_val_score', 'cross_validate', 'learning_curve', 'permutation_test_score', 'train_test_split', 'typing', 'validation_curve']


In [116]:
from sklearn.model_selection import RandomizedSearchCV

In [117]:
clf1_ridge=RandomizedSearchCV(ridge,{
    'alpha': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']},cv=5)
clf1_ridge.fit(x_train,y_train)
clf1_ridge.cv_results_

{'mean_fit_time': array([0.017451  , 0.00251784, 0.00691996, 0.00641203, 0.00308914,
        0.00160112, 0.00122175, 0.00204058, 0.00195775, 0.00099769]),
 'std_fit_time': array([3.04668839e-02, 5.22434193e-04, 7.87490855e-03, 8.84814288e-03,
        2.58045225e-03, 4.91074854e-04, 3.89443729e-04, 5.49562311e-04,
        8.51809581e-04, 6.13732214e-06]),
 'mean_score_time': array([0.00226326, 0.00150418, 0.00161734, 0.00140052, 0.00099816,
        0.00120006, 0.00107212, 0.00161238, 0.00140982, 0.00160384]),
 'std_score_time': array([9.79410574e-04, 6.47289169e-04, 5.28156740e-04, 4.91400910e-04,
        2.50734744e-05, 4.00138594e-04, 1.34135267e-04, 4.92217600e-04,
        4.79757957e-04, 4.96197581e-04]),
 'param_solver': masked_array(data=['cholesky', 'auto', 'svd', 'saga', 'lsqr', 'sag',
                    'auto', 'lsqr', 'cholesky', 'auto'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
    

In [118]:
df1_ridge=pd.DataFrame(clf1_ridge.cv_results_)
df1_ridge

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_solver,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.017451,0.030467,0.002263,0.000979,cholesky,0.3,"{'solver': 'cholesky', 'alpha': 0.3}",0.489796,0.322245,0.528367,0.535033,0.288928,0.432874,0.105598,2
1,0.002518,0.000522,0.001504,0.000647,auto,0.7,"{'solver': 'auto', 'alpha': 0.7}",0.415933,0.302721,0.486405,0.466786,0.265984,0.387566,0.088126,7
2,0.00692,0.007875,0.001617,0.000528,svd,0.3,"{'solver': 'svd', 'alpha': 0.3}",0.489796,0.322245,0.528367,0.535033,0.288928,0.432874,0.105598,4
3,0.006412,0.008848,0.001401,0.000491,saga,0.3,"{'solver': 'saga', 'alpha': 0.3}",0.48979,0.322248,0.528362,0.535031,0.288926,0.432871,0.105596,5
4,0.003089,0.00258,0.000998,2.5e-05,lsqr,0.5,"{'solver': 'lsqr', 'alpha': 0.5}",0.451037,0.312467,0.508175,0.499162,0.278808,0.40993,0.095913,6
5,0.001601,0.000491,0.0012,0.0004,sag,0.9,"{'solver': 'sag', 'alpha': 0.9}",0.385055,0.293398,0.464999,0.438293,0.252521,0.366853,0.081896,9
6,0.001222,0.000389,0.001072,0.000134,auto,0.9,"{'solver': 'auto', 'alpha': 0.9}",0.385046,0.293386,0.464999,0.438296,0.252524,0.36685,0.081898,10
7,0.002041,0.00055,0.001612,0.000492,lsqr,0.2,"{'solver': 'lsqr', 'alpha': 0.2}",0.509107,0.326587,0.536295,0.553118,0.291329,0.443287,0.111136,1
8,0.001958,0.000852,0.00141,0.00048,cholesky,0.7,"{'solver': 'cholesky', 'alpha': 0.7}",0.415933,0.302721,0.486405,0.466786,0.265984,0.387566,0.088126,7
9,0.000998,6e-06,0.001604,0.000496,auto,0.3,"{'solver': 'auto', 'alpha': 0.3}",0.489796,0.322245,0.528367,0.535033,0.288928,0.432874,0.105598,2


In [119]:
df1_ridge[['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
0,"{'solver': 'cholesky', 'alpha': 0.3}",0.432874
1,"{'solver': 'auto', 'alpha': 0.7}",0.387566
2,"{'solver': 'svd', 'alpha': 0.3}",0.432874
3,"{'solver': 'saga', 'alpha': 0.3}",0.432871
4,"{'solver': 'lsqr', 'alpha': 0.5}",0.40993
5,"{'solver': 'sag', 'alpha': 0.9}",0.366853
6,"{'solver': 'auto', 'alpha': 0.9}",0.36685
7,"{'solver': 'lsqr', 'alpha': 0.2}",0.443287
8,"{'solver': 'cholesky', 'alpha': 0.7}",0.387566
9,"{'solver': 'auto', 'alpha': 0.3}",0.432874


In [120]:
print(clf1_ridge.best_params_)
print(clf1_ridge.best_score_)

{'solver': 'lsqr', 'alpha': 0.2}
0.4432870718606144


In [121]:
clf1_decision=RandomizedSearchCV(dtr,{
    'max_depth': [0,1,2,3,4,5,6], 'min_samples_split': [2,3,4,5,10], 'splitter': ['best','random']}, cv=5
    )
clf1_decision.fit(x_train,y_train)
clf1_decision.cv_results_

10 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\sanni\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameter

{'mean_fit_time': array([0.00874815, 0.0023067 , 0.00293832, 0.00468388, 0.00463848,
        0.00435266, 0.00460968, 0.00363278, 0.00128078, 0.00060382]),
 'std_fit_time': array([0.00898494, 0.00061123, 0.00129273, 0.00086451, 0.00093981,
        0.00083821, 0.0010966 , 0.00063611, 0.00039015, 0.00049338]),
 'mean_score_time': array([0.00271177, 0.00167308, 0.00159869, 0.00355997, 0.00336185,
        0.00350862, 0.00323057, 0.00275455, 0.        , 0.        ]),
 'std_score_time': array([0.00058239, 0.00045295, 0.00048841, 0.00059127, 0.00060146,
        0.00030425, 0.00085645, 0.00038782, 0.        , 0.        ]),
 'param_splitter': masked_array(data=['best', 'random', 'random', 'random', 'best', 'random',
                    'random', 'random', 'random', 'best'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_min_samples_split': masked_array(data=[5, 2, 4, 3, 2, 

In [122]:
df1_decision=pd.DataFrame(clf1_decision.cv_results_)
df1_decision

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_splitter,param_min_samples_split,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008748,0.008985,0.002712,0.000582,best,5,5,"{'splitter': 'best', 'min_samples_split': 5, '...",0.127018,-0.049521,0.363987,0.408715,0.264033,0.222846,0.167052,2
1,0.002307,0.000611,0.001673,0.000453,random,2,4,"{'splitter': 'random', 'min_samples_split': 2,...",0.221667,0.104311,0.239238,0.509682,0.120803,0.23914,0.145383,1
2,0.002938,0.001293,0.001599,0.000488,random,4,6,"{'splitter': 'random', 'min_samples_split': 4,...",0.306456,-0.054734,0.341181,0.333434,-0.106554,0.163957,0.200718,7
3,0.004684,0.000865,0.00356,0.000591,random,3,4,"{'splitter': 'random', 'min_samples_split': 3,...",0.051835,0.104311,0.239238,0.437927,0.120803,0.190823,0.13792,4
4,0.004638,0.00094,0.003362,0.000601,best,2,1,"{'splitter': 'best', 'min_samples_split': 2, '...",0.184374,0.077678,0.317054,0.378056,0.124913,0.216415,0.113933,3
5,0.004353,0.000838,0.003509,0.000304,random,4,1,"{'splitter': 'random', 'min_samples_split': 4,...",0.010527,0.098713,0.053169,0.151773,0.059497,0.074736,0.047602,8
6,0.00461,0.001097,0.003231,0.000856,random,10,3,"{'splitter': 'random', 'min_samples_split': 10...",0.184302,0.013067,0.249319,0.337894,0.145177,0.185952,0.108311,5
7,0.003633,0.000636,0.002755,0.000388,random,10,2,"{'splitter': 'random', 'min_samples_split': 10...",0.188988,0.185413,0.110298,0.226256,0.210722,0.184336,0.039895,6
8,0.001281,0.00039,0.0,0.0,random,4,0,"{'splitter': 'random', 'min_samples_split': 4,...",,,,,,,,9
9,0.000604,0.000493,0.0,0.0,best,3,0,"{'splitter': 'best', 'min_samples_split': 3, '...",,,,,,,,9


In [124]:
df1_decision[['params','mean_test_score']]

Unnamed: 0,params,mean_test_score
0,"{'splitter': 'best', 'min_samples_split': 5, '...",0.222846
1,"{'splitter': 'random', 'min_samples_split': 2,...",0.23914
2,"{'splitter': 'random', 'min_samples_split': 4,...",0.163957
3,"{'splitter': 'random', 'min_samples_split': 3,...",0.190823
4,"{'splitter': 'best', 'min_samples_split': 2, '...",0.216415
5,"{'splitter': 'random', 'min_samples_split': 4,...",0.074736
6,"{'splitter': 'random', 'min_samples_split': 10...",0.185952
7,"{'splitter': 'random', 'min_samples_split': 10...",0.184336
8,"{'splitter': 'random', 'min_samples_split': 4,...",
9,"{'splitter': 'best', 'min_samples_split': 3, '...",


In [123]:
clf1_decision.best_params_,clf1_decision.best_score_

({'splitter': 'random', 'min_samples_split': 2, 'max_depth': 4},
 0.2391403756061381)