In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso

In [2]:
from sklearn.datasets import load_diabetes

In [7]:
load_diabetes()

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [4]:
X, y = load_diabetes(return_X_y=True)

In [5]:
features = load_diabetes()['feature_names']

In [6]:
features

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [11]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Lasso())
])

In [14]:
search = GridSearchCV(pipeline,
                      {'model__alpha': np.arange(0.1, 10, 0.1)},
                      cv = 5, scoring="neg_mean_squared_error", verbose=3
                      )

In [15]:
search.fit(X_train, y_train)

Fitting 5 folds for each of 99 candidates, totalling 495 fits
[CV 1/5] END ..............model__alpha=0.1;, score=-3051.708 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.1;, score=-3405.374 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.1;, score=-2593.337 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.1;, score=-3309.628 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.1;, score=-3512.110 total time=   0.0s
[CV 1/5] END ..............model__alpha=0.2;, score=-3057.672 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.2;, score=-3413.561 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.2;, score=-2606.353 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.2;, score=-3317.427 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.2;, score=-3502.696 total time=   0.0s
[CV 1/5] END model__alpha=0.30000000000000004;, score=-3057.759 total time=   0.0s
[CV 2/5] END model__alpha=0.30000000000000004

[CV 1/5] END ..............model__alpha=0.4;, score=-3054.995 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.4;, score=-3423.267 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.4;, score=-2600.535 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.4;, score=-3317.535 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.4;, score=-3490.310 total time=   0.0s
[CV 1/5] END ..............model__alpha=0.5;, score=-3052.664 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.5;, score=-3425.377 total time=   0.0s
[CV 3/5] END ..............model__alpha=0.5;, score=-2596.304 total time=   0.0s
[CV 4/5] END ..............model__alpha=0.5;, score=-3315.477 total time=   0.0s
[CV 5/5] END ..............model__alpha=0.5;, score=-3483.496 total time=   0.0s
[CV 1/5] END ..............model__alpha=0.6;, score=-3050.765 total time=   0.0s
[CV 2/5] END ..............model__alpha=0.6;, score=-3427.624 total time=   0.0s
[CV 3/5] END ..............m

In [16]:
search.best_params_

{'model__alpha': 1.2000000000000002}

In [17]:
coefficient = search.best_estimator_.named_steps['model'].coef_

In [18]:
importance = np.abs(coefficient)

In [19]:
importance

array([ 0.15109046,  9.00504871, 26.90196877, 18.04852682,  5.41859386,
        0.        , 12.27906268,  0.        , 19.48909411,  0.        ])

In [20]:
np.array(features)[importance > 0]

array(['age', 'sex', 'bmi', 'bp', 's1', 's3', 's5'], dtype='<U3')

In [21]:
np.array(features)[importance ==0]

array(['s2', 's4', 's6'], dtype='<U3')