In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline

In [17]:
data = load_breast_cancer()

X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.3)

In [18]:
grid_params = {"clf__C" :[10**-4, 10**-2, 10**0, 10**2, 10**4]}

pipe = Pipeline([
    ("scaler",StandardScaler()),
    ("clf",LogisticRegression())
])

model  =GridSearchCV(pipe,
                     param_grid=grid_params,
                     scoring="f1",
                     cv=5,
                     n_jobs=-1,refit=True,verbose=2)

model.fit(X_train,y_train)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [19]:
model.best_score_

0.983957946310095

In [20]:
print(model.best_estimator_)

Pipeline(steps=[('scaler', StandardScaler()), ('clf', LogisticRegression(C=1))])


In [21]:
model.score(X_test,y_test)

0.9732142857142858

In [22]:
# Lasso will create the sparsity in the weight

import numpy as np

for c in [10,1,0.1,0.01]:
    pipe = Pipeline([
        ("scaler",StandardScaler()),
        ("clf",LogisticRegression(solver="liblinear",C=c,penalty="l1"))
    ])

    pipe.fit(X_train,y_train)
    print(f"{c:5} has {np.count_nonzero(pipe['clf'].coef_):2} non zero weights with {pipe.score(X_test,y_test)}")

   10 has 21 non zero weights with 0.9473684210526315
    1 has 16 non zero weights with 0.9649122807017544
  0.1 has  6 non zero weights with 0.9707602339181286
 0.01 has  2 non zero weights with 0.9415204678362573


Increase the regularization strength number of non-zero weights increase ie sparsity in the weights increase