In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


data = load_breast_cancer()
X = data.data
y = data.target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(max_iter=2000))
])

pipe.fit(X_train, y_train)


y_pred = pipe.predict(X_test)


acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)


Accuracy: 0.9736842105263158


In [2]:
import pandas as pd
import numpy as np




coef = pipe.named_steps['model'].coef_[0]


features = data.feature_names


importance_df = pd.DataFrame({
    'feature': features,
    'importance': np.abs(coef)
})


importance_df = importance_df.sort_values(by='importance', ascending=False)

print(importance_df.head(10))


                 feature  importance
21         worst texture    1.350606
10          radius error    1.268178
28        worst symmetry    1.208200
7    mean concave points    1.119804
26       worst concavity    0.943053
13            area error    0.907186
20          worst radius    0.879840
23            worst area    0.841846
6         mean concavity    0.801458
27  worst concave points    0.778217
