In [15]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = load_breast_cancer()

# Split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define hyperparameter grid
tuned_parameters = [{'C': [10**-4, 10**-2, 10**0, 10**2, 10**4]}] #C=1/lambda

# Perform GridSearchCV on Logistic Regression
model = GridSearchCV(LogisticRegression(max_iter=1000, solver='lbfgs'), tuned_parameters, scoring='f1') #lgfbs:-lbfgs is a quasi-Newton optimization method that is efficient for smaller datasets and is the default solver for Logistic Regression.
model.fit(X_train_scaled, Y_train)

# Results
print("Best Estimator:", model.best_estimator_)
print("Best F1 Score on Test Set:", model.score(X_test_scaled, Y_test))


Best Estimator: LogisticRegression(C=1, max_iter=1000)
Best F1 Score on Test Set: 0.9790209790209791


In [22]:
import numpy as np

clf=LogisticRegression(C=0.1, penalty='l2',max_iter=1000);
clf.fit(X_train, Y_train);
w=clf.coef_
print(np.count_nonzero(w))

30


In [26]:
clf=LogisticRegression(C=0.01, penalty='l2',max_iter=1000);
clf.fit(X_train, Y_train);
w=clf.coef_
print(np.count_nonzero(w))

30
