# 导入数据集并划分训练集和测试集

In [21]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

# 数据导入
boston = load_boston()
ly_X = boston.data
ly_y = boston.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(ly_X, ly_y, test_size=0.2, random_state=100)

# 输出数据形状信息
print(f"训练特征集{X_train.shape}")
print(f"测试特征集{X_test.shape}")
print(f"训练集目标{y_train.shape}")
print(f"测试集目标{y_test.shape}")

训练特征集(404, 13)
测试特征集(102, 13)
训练集目标(404,)
测试集目标(102,)


# 构建模型和训练模型
- 使用sklearn构建线性回归模型LinearRegression和随机梯度下降SGD回归模型并完成训练模型
- 对训练效果和测试效果进行评估和对比

In [22]:
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import MinMaxScaler

# 标准化特征数据
mms = MinMaxScaler()
# 训练集和测试集标准化
mms.fit(ly_X)
# 转换数据
X_train = mms.transform(X_train)
X_test = mms.transform(X_test)


In [23]:
# 构建线性回归模型
lr = LinearRegression()
# 训练线性回归模型
lr.fit(X_train, y_train)
# 打印线性回归模型在训练集和测试集上的得分
print("线性回归模型：")
print(f"训练集得分：{lr.score(X_train, y_train)}")
print(f"测试集得分：{lr.score(X_test, y_test)}")

线性回归模型：
训练集得分：0.7337332767917557
测试集得分：0.7555033086871303


In [24]:
# 构建随机梯度下降模型
sgd = SGDRegressor(max_iter=20000)
# 训练随机梯度下降模型
sgd.fit(X_train, y_train)
# 打印随机梯度下降模型在训练集和测试集上的得分
print("随机梯度下降模型：")
print(f"训练集得分：{sgd.score(X_train, y_train)}")
print(f"测试集得分：{sgd.score(X_test, y_test)}")

随机梯度下降模型：
训练集得分：0.7138719943855905
测试集得分：0.7275046600083583


In [25]:
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import r2_score

# 构建多项式特征，并构建LASSO回归和Ridge回归模型
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)


In [26]:
# LASSO回归模型
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_poly, y_train)

# 打印LASSO回归模型在训练集和测试集上的得分
print("LASSO回归模型：")
print(f"训练集得分：{lasso.score(X_train_poly, y_train)}")
print(f"测试集得分：{lasso.score(X_test_poly, y_test)}")

LASSO回归模型：
训练集得分：0.7280609148244986
测试集得分：0.7451103205924074


In [27]:
# Ridge回归模型
ridge = Ridge(alpha=0.1)
ridge.fit(X_train_poly, y_train)

# 打印Ridge回归模型在训练集和测试集上的得分
print("Ridge回归模型：")
print(f"训练集得分：{ridge.score(X_train_poly, y_train)}")
print(f"测试集得分：{ridge.score(X_test_poly, y_test)}")

Ridge回归模型：
训练集得分：0.9042265406166784
测试集得分：0.8831783933386088
