In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
# 导入load_breast_cancer数据
cancer = load_breast_cancer()
x = cancer['data']
y = cancer['target']
# 将数据划分为训练集测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=22)
print('x_train第1行数据为：\n', x_train[0: 1], '\n', 'y_train第1个数据为：', y_train[0: 1])

x_train第1行数据为：
 [[1.184e+01 1.870e+01 7.793e+01 4.406e+02 1.109e-01 1.516e-01 1.218e-01
  5.182e-02 2.301e-01 7.799e-02 4.825e-01 1.030e+00 3.475e+00 4.100e+01
  5.551e-03 3.414e-02 4.205e-02 1.044e-02 2.273e-02 5.667e-03 1.682e+01
  2.812e+01 1.194e+02 8.887e+02 1.637e-01 5.775e-01 6.956e-01 1.546e-01
  4.761e-01 1.402e-01]] 
 y_train第1个数据为： [0]


In [2]:
from sklearn.preprocessing import StandardScaler
stdScaler = StandardScaler().fit(x_train)
x_trainStd = stdScaler.transform(x_train)
x_testStd = stdScaler.transform(x_test)
# 使用LogisticRegression类构建Logistic回归模型
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression(solver='saga')
# 训练Logistic回归模型
lr_model.fit(x_trainStd, y_train)
print('训练出来的LogisticRegression模型为：\n', lr_model)

训练出来的LogisticRegression模型为：
 LogisticRegression(solver='saga')




In [3]:
print('各特征的相关系数为：\n', lr_model.coef_)

print('模型的截距为：', lr_model.intercept_)

print('模型的迭代次数为：', lr_model.n_iter_)

各特征的相关系数为：
 [[-0.58579014 -0.66067683 -0.55634186 -0.59607596 -0.13948389  0.28529265
  -0.64271129 -0.73800361  0.02286473  0.54624353 -0.93870093  0.26864818
  -0.68766744 -0.76848007 -0.2981475   0.57774825 -0.05569851 -0.14185517
   0.47176926  0.57522735 -0.93120212 -1.03890126 -0.81967514 -0.87916831
  -0.89658908 -0.09209329 -0.77421779 -0.86928201 -0.74314845 -0.29092414]]
模型的截距为： [0.6446655]
模型的迭代次数为： [100]


In [4]:
print('预测测试集前10个结果为：\n', lr_model.predict(x_testStd)[: 10])

print('测试集准确率为：', lr_model.score(x_testStd, y_test))

print('测试集前3个对应类别的概率为：\n', lr_model.predict_proba(x_testStd)[: 3])

print('测试集前3个对应类别的概率的log值为：\n',
      lr_model.predict_log_proba(x_testStd)[: 3])

print('测试集前3个的决策函数值为：\n',
      lr_model.decision_function(x_testStd)[: 3])

print('模型的参数为：\n', lr_model.get_params())

print('修改max_iter参数为1000后的模型为：\n', lr_model.set_params(max_iter=1000))

print('系数矩阵转为密度数组后的模型为：\n', lr_model.densify())

print('系数矩阵转为稀疏形式后的模型为：\n', lr_model.sparsify())

预测测试集前10个结果为：
 [1 0 0 0 1 1 1 1 1 1]
测试集准确率为： 0.9736842105263158
测试集前3个对应类别的概率为：
 [[1.39845803e-02 9.86015420e-01]
 [9.99999952e-01 4.80710478e-08]
 [9.99966961e-01 3.30387499e-05]]
测试集前3个对应类别的概率的log值为：
 [[-4.26979997e+00 -1.40832858e-02]
 [-4.80710490e-08 -1.68505858e+01]
 [-3.30392957e-05 -1.03178294e+01]]
测试集前3个的决策函数值为：
 [  4.25571668 -16.85058571 -10.31779641]
模型的参数为：
 {'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'saga', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
修改max_iter参数为1000后的模型为：
 LogisticRegression(max_iter=1000, solver='saga')
系数矩阵转为密度数组后的模型为：
 LogisticRegression(max_iter=1000, solver='saga')
系数矩阵转为稀疏形式后的模型为：
 LogisticRegression(max_iter=1000, solver='saga')
