In [4]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 加载乳腺癌数据集
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建并训练逻辑回归模型
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = model.predict(X_test)

# 输出模型性能
print("准确率:", accuracy_score(y_test, y_pred))
print("\n分类报告:")
print(classification_report(y_test, y_pred, target_names=['良性', '恶性']))




准确率: 0.9473684210526315

分类报告:
              precision    recall  f1-score   support

          良性       0.97      0.88      0.93        43
          恶性       0.93      0.99      0.96        71

    accuracy                           0.95       114
   macro avg       0.95      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
from sklearn.model_selection import GridSearchCV

# 定义参数网格
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],#正则化力度:c越小，模型越复杂，c越大，模型越简单
    'max_iter': [100, 200, 300],#最大迭代次数
    'solver': ['lbfgs', 'newton-cg', 'liblinear']#优化器
}

# 创建网格搜索对象
grid_search = GridSearchCV(
    LogisticRegression(random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# 执行网格搜索
grid_search.fit(X_train, y_train)

# 输出最佳参数和得分
print("最佳参数:", grid_search.best_params_)
print("最佳得分:", grid_search.best_score_)

# 使用最佳参数的模型进行预测
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)

print("\n使用最佳参数的模型性能:")
print("准确率:", accuracy_score(y_test, y_pred_best))
print("\n分类报告:")
print(classification_report(y_test, y_pred_best, target_names=['良性', '恶性']))

# 显示所有参数组合的结果
results = pd.DataFrame(grid_search.cv_results_)
print("\n所有参数组合的平均验证得分:")
for i in range(len(results)):
    params = results.loc[i, 'params']
    score = results.loc[i, 'mean_test_score']
    print(f"参数: {params}")
    print(f"平均验证准确率: {score:.4f}\n")


最佳参数: {'C': 100, 'max_iter': 100, 'solver': 'newton-cg'}
最佳得分: 0.9670329670329672

使用最佳参数的模型性能:
准确率: 0.956140350877193

分类报告:
              precision    recall  f1-score   support

          良性       0.97      0.91      0.94        43
          恶性       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


所有参数组合的平均验证得分:
参数: {'C': 0.001, 'max_iter': 100, 'solver': 'lbfgs'}
平均验证准确率: 0.9231

参数: {'C': 0.001, 'max_iter': 100, 'solver': 'newton-cg'}
平均验证准确率: 0.9297

参数: {'C': 0.001, 'max_iter': 100, 'solver': 'liblinear'}
平均验证准确率: 0.9209

参数: {'C': 0.001, 'max_iter': 200, 'solver': 'lbfgs'}
平均验证准确率: 0.9297

参数: {'C': 0.001, 'max_iter': 200, 'solver': 'newton-cg'}
平均验证准确率: 0.9297

参数: {'C': 0.001, 'max_iter': 200, 'solver': 'liblinear'}
平均验证准确率: 0.9209

参数: {'C': 0.001, 'max_iter': 300, 'solver': 'lbfgs'}
平均验证准确率: 0.9297

参数: {'C': 0.001, 'max_ite