In [None]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import Ridge, RidgeCV
import matplotlib.pyplot as plt

diabetes = pd.read_excel(r'diabetes.xlsx', sep='')
predictors = diabetes.columns[2:-1]
X_train, X_test, y_train, y_test = model_selection.train_test_split(diabetes[predictors],
                                                                   diabetes['Y'],
                                                                   test_size=0.2,
                                                                   random_state=1234)

In [None]:
# 岭回归
#构造不同lambda值
Lambdas = np.logspace(-5, 2, 200)
# 偏回归系数
ridge_cofficients = []
for Lambda in Lambdas:
    ridge = Ridge(alpha=Lambda, normalize=True)
    ridge.fit(X_train, y_train)
    ridge_cofficients.append(ridge.coef_)
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
plt.style.use('ggplot')
plt.plot(Lambdas, ridge_cofficients)
# 对X轴变换
plt.xscale('log')
plt.xlabel('Lambda')
plt.ylabel('Cofficients')
plt.show()

In [None]:
# 岭回归模型交叉检验
ridge_cv = RidgeCV(alphas=Lambdas, normalize=True, scoring='neg_mean_squared_error', cv=10)
ridge_cv.fit(X_train, y_train)
ridge_best_alpha = ridge_cv.alpha_
ridge_best_alpha

In [None]:
from sklearn.metrics import mean_squared_error
#基于最佳lambda值建模
ridge = Ridge(alpha=ridge_best_alpha, normalize=True)
ridge.fit(X_train, y_train)
pd.Series(index=['Intercept']+X_train.columns.tolist(), data=[ridge.intercept_] + ridge.coef_.tolist())
#预测
ridge_predict = ridge.predict(X_test)
#预测效果
RMSE = np.sqrt(mean_squared_error(y_test, ridge_predict))
RMSE

In [None]:
# Lasso回归
from sklearn.linear_model import Lasso, LassoCV
lasso_cofficients = []
for Lambda in Lambdas:
    lasso = Lasso(alpha=Lambda, normalize=True, max_iter=10000)
    lasso.fit(X_train, y_train)
    lasso_cofficients.append(lasso.coef_)

#绘制lambda与回归系数关系
plt.plot(Lambdas, lasso_cofficients)
plt.xscale('log')
plt.xlabel('Lambda')
plt.ylabel('Cofficients')
plt.show()

In [None]:
# LASSO回归模型交叉检验
lasso_cv = LassoCV(alphas=Lambdas, normalize=True, cv=10, max_iter=10000)
lasso_cv.fit(X_train, y_train)
lasso_best_alpha = lasso_cv.alpha_
lasso_best_alpha

In [None]:
# 基于最佳Lambda建模
lasso = Lasso(alpha=lasso_best_alpha, normalize=True, max_iter=10000)
lasso.fit(X_train, y_train)
pd.Series(index = ['Intercept'] + X_train.columns.tolist(),data = [lasso.intercept_] + lasso.coef_.tolist())
#预测
lasso_predict = lasso.predict(X_test)
RMSE = np.sqrt(mean_squared_error(y_test, lasso_predict))
RMSE

In [None]:
# statsmodels
from statsmodels import api as sms
X_train2 = sms.add_constant(X_train)
X_test2 = sms.add_constant(X_test)

linear = sms.OLS(y_train, X_train2).fit()
display(linear.params)

linear_predict = linear.predict(X_test2)
RMSE = np.sqrt(mean_squared_error(y_test, linear_predict))
RMSE