In [34]:
import numpy as np
from sklearn.model_selection import train_test_split

# 波士顿房价数据集
data = np.genfromtxt("D:\Downloads\housing.csv", delimiter=',', skip_header=1)
X = data[:, :-1]
y = data[:, -1]

# 数据标准化
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
# 定义线性回归模型类
class LinearRegression:
    def __init__(self, fit_intercept=True):
        self.fit_intercept = fit_intercept
        self.weights = None
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        self.weights = np.linalg.inv(X.T @ X) @ X.T @ y
    
    def predict(self, X):
        if self.fit_intercept:
            X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        return X @ self.weights

In [42]:
# 解析解
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)
mse = np.mean((y_pred - y_test) ** 2)
print("解析解:", mse)

解析解: 24.291119474973563


In [45]:
# 定义梯度下降模型类
class GradientDescent:
    def __init__(self, learning_rate=0.01, num_iterations=1000, fit_intercept=True):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.fit_intercept = fit_intercept
        self.weights = None
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        self.weights = np.zeros(X.shape[1])
        num_samples = X.shape[0]
        for _ in range(self.num_iterations):
            y_pred = X @ self.weights
            dw = (1 / num_samples) * X.T @ (y_pred - y)
            self.weights -= self.learning_rate * dw
    
    def predict(self, X):
        if self.fit_intercept:
            X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        return X @ self.weights

In [46]:
# 梯度下降法
gd_model = GradientDescent()
gd_model.fit(X_train, y_train)
y_pred = gd_model.predict(X_test)
mse = np.mean((y_pred - y_test) ** 2)
print("梯度下降法:", mse)

梯度下降法: 25.406311694344584


上述输出了解析解方法和梯度下降法在测试集上的均方误差(MSE),可见解析解方法的值小于梯度下降法，即解析解方法对波士顿房价数据集的拟合效果更好。

解析解求解直接得出问题解，适用于简单的问题，可以得到全局最优解；而梯度下降法求解通过迭代调整参数，逐步接近最小值点，适用于大规模数据的问题，同时也要注意防止陷入局部最优解

新年快乐！(〃'▽'〃)