In [29]:
# 导入必要库
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 加载波士顿房价数据集
boston = fetch_openml(name="boston", version=1, as_frame=True)
X = boston.data.select_dtypes(['float64', 'int64'])
y = boston.target
print("X:\n", X)
print("y:\n", y)

X:
         CRIM    ZN  INDUS    NOX     RM   AGE     DIS    TAX  PTRATIO       B  \
0    0.00632  18.0   2.31  0.538  6.575  65.2  4.0900  296.0     15.3  396.90   
1    0.02731   0.0   7.07  0.469  6.421  78.9  4.9671  242.0     17.8  396.90   
2    0.02729   0.0   7.07  0.469  7.185  61.1  4.9671  242.0     17.8  392.83   
3    0.03237   0.0   2.18  0.458  6.998  45.8  6.0622  222.0     18.7  394.63   
4    0.06905   0.0   2.18  0.458  7.147  54.2  6.0622  222.0     18.7  396.90   
..       ...   ...    ...    ...    ...   ...     ...    ...      ...     ...   
501  0.06263   0.0  11.93  0.573  6.593  69.1  2.4786  273.0     21.0  391.99   
502  0.04527   0.0  11.93  0.573  6.120  76.7  2.2875  273.0     21.0  396.90   
503  0.06076   0.0  11.93  0.573  6.976  91.0  2.1675  273.0     21.0  396.90   
504  0.10959   0.0  11.93  0.573  6.794  89.3  2.3889  273.0     21.0  393.45   
505  0.04741   0.0  11.93  0.573  6.030  80.8  2.5050  273.0     21.0  396.90   

     LSTAT  
0     4.98

In [35]:
# 缺失值处理
X = X.fillna(X.mean())

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 最小二乘法（LinearRegression）
ls_model = LinearRegression()
ls_model.fit(X_train_scaled, y_train)
y_pred_ls = ls_model.predict(X_test_scaled)

# 梯度下降法（SGDRegressor）
gd_model = SGDRegressor(max_iter=1000, random_state=42)
gd_model.fit(X_train_scaled, y_train)
y_pred_gd = gd_model.predict(X_test_scaled)

# 评估结果
print("=== 最小二乘法（LinearRegression） ===")
print("MSE:", mean_squared_error(y_test, y_pred_ls))
print("R²:", r2_score(y_test, y_pred_ls))

print("\n=== 梯度下降法（SGDRegressor） ===")
print("MSE:", mean_squared_error(y_test, y_pred_gd))
print("R²:", r2_score(y_test, y_pred_gd))

=== 最小二乘法（LinearRegression） ===
MSE: 27.076445309751257
R²: 0.6307780105854284

=== 梯度下降法（SGDRegressor） ===
MSE: 27.37409776656933
R²: 0.6267191383441403
