In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# 1. 加载数据
data = load_breast_cancer()
X, y = data.data, data.target          # X 形状 (569, 30)，y 为 0/1
feature_names = data.feature_names
target_names = data.target_names       # ['malignant' 'benign']

# 2. 划分训练集与测试集 8:2
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# 3. 特征标准化（逻辑回归对尺度敏感）
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std  = scaler.transform(X_test)

In [2]:
# 4. 建立逻辑回归模型并训练
#   max_iter 可调大，确保收敛；penalty='l2' 为默认 L2 正则
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_std, y_train)

# 5. 预测
y_pred = log_reg.predict(X_test_std)

# 6. 评估
acc  = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec  = recall_score(y_test, y_pred)
f1   = f1_score(y_test, y_pred)
cm   = confusion_matrix(y_test, y_pred)

print("===== 逻辑回归在测试集上的性能 =====")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1-score : {f1:.4f}")
print("Confusion Matrix:\n", cm)


===== 逻辑回归在测试集上的性能 =====
Accuracy : 0.9825
Precision: 0.9861
Recall   : 0.9861
F1-score : 0.9861
Confusion Matrix:
 [[41  1]
 [ 1 71]]
