In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 导入线性回归模型
from sklearn.linear_model import Ridge

# 导入数据集划分对象
from sklearn.model_selection import train_test_split

# 导入波士顿房价数据集
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# 导入回归模型的评估指标
from sklearn.metrics import mean_squared_error, r2_score  

from IPython.core.interactiveshell import InteractiveShell # 这个对象设置所有行全部输出
  
# 设置该对象ast_node_interactivity的属性值为all，表示notebook下每一行有输出的代码全部输出运算结果
InteractiveShell.ast_node_interactivity = "all"

# 解决坐标轴刻度负号乱码
plt.rcParams['axes.unicode_minus'] = False

# 解决中文乱码问题
plt.rcParams['font.sans-serif'] = ['Simhei']
plt.style.use('ggplot')


In [15]:
# 加载数据
x = data
y = target
x.shape

(506, 13)

In [16]:
# 数据集划分
Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.3, random_state=123)

In [17]:
reg_1 = Ridge(alpha = 0.001)

reg_1.fit(Xtrain, Ytrain)

# 查看回归系数
reg_1.coef_

# 查看截距
reg_1.intercept_

array([-1.00986844e-01,  3.99587698e-02,  7.53224577e-02,  2.64323828e-01,
       -1.43265524e+01,  4.83332493e+00, -7.45349656e-03, -1.32652166e+00,
        2.73450190e-01, -1.30449264e-02, -9.56483191e-01,  6.42522869e-03,
       -4.86585271e-01])

28.97118331987133

In [20]:
# 导入带有k折交叉验证得岭回归模型
from sklearn.linear_model import RidgeCV

In [31]:
reg_2 = RidgeCV(
  alphas = np.arange(1, 1001, 100),
  scoring = 'r2',
  store_cv_values = True # 是否保存每次验证结果
).fit(Xtrain, Ytrain)

# 没有交叉验证的岭回归模型的参数w的结果
reg_1.score(Xtest, Ytest)

# 带有10折交叉验证的岭回归模型的参数w的结果
reg_2.score(Xtest, Ytest)

# 查看所有10折交叉验证的结果 - 误差值
pd.DataFrame(reg_2.cv_values_)

# 10折交叉验证的测试误差的结果可以进一步求其均值
reg_2.cv_values_.mean(axis = 0)

# 可以选出经过k折交叉验证后所得的最佳正则化系数
reg_2.alpha_

0.6485496162842532

0.6392633434527886

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,11.361649,8.702176,7.864217,7.445245,7.191854,7.020763,6.896396,6.800993,6.724714,6.661686
1,5.448356,5.779545,5.883131,5.936753,5.964398,5.976889,5.980011,5.977148,5.970395,5.961098
2,4.072377,4.994944,5.165460,5.176272,5.143813,5.099367,5.053205,5.008940,4.967765,4.929930
3,-3.549215,-3.906073,-4.117534,-4.228332,-4.300640,-4.354168,-4.397027,-4.433151,-4.464676,-4.492869
4,-3.833538,-5.756266,-6.370299,-6.688030,-6.885720,-7.021699,-7.121236,-7.197209,-7.256940,-7.304932
...,...,...,...,...,...,...,...,...,...,...
349,13.117191,10.183197,9.118045,8.524292,8.130228,7.842630,7.619718,7.439566,7.289400,7.161206
350,-2.450364,-4.253522,-4.699788,-4.922445,-5.058629,-5.152089,-5.221135,-5.274798,-5.318065,-5.353928
351,0.088068,1.292173,1.704154,1.946298,2.114743,2.242192,2.343410,2.426312,2.495659,2.554560
352,-9.803939,-9.505144,-9.440008,-9.418071,-9.407268,-9.398968,-9.390396,-9.380650,-9.369510,-9.357012


array([-0.02835224, -0.02077202, -0.01865564, -0.01739125, -0.01647812,
       -0.01575738, -0.015158  , -0.01464199, -0.0141867 , -0.01377757])

1