## 梯度下降法的向量化

In [1]:
import numpy as np
from sklearn import datasets

In [6]:
# 导入真实的数据集
boston = datasets.load_boston()

In [7]:
X = boston.data
y = boston.target

X = X[y < 50.0]
y = y[y < 50.0]

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

In [4]:
from playML.LinearRegression import LinearRegression

# 用线性回归算法得到的结果
lin_reg1 = LinearRegression()
%time lin_reg1.fit_normal(X_train, y_train)
lin_reg1.score(X_test, y_test)

Wall time: 19 ms


0.8009390227581117

### 使用梯度下降法

In [8]:
lin_reg2 = LinearRegression()
lin_reg2.fit_gd(X_train, y_train, eta=0.000001)

LinearRegression()

In [9]:
lin_reg2.score(X_test, y_test)

0.30334492611229424

In [10]:
%time lin_reg2.fit_gd(X_train, y_train, eta=0.000001, n_iters=1e6)

Wall time: 17.6 s


LinearRegression()

In [11]:
lin_reg2.score(X_test, y_test)
# 由此可见，耗时效果不好，原因是数据衡量标准不一致

0.737942001474466

### 使用梯度下降法前进行数据归一化

In [12]:
from sklearn.preprocessing import StandardScaler

standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)

lin_reg3 = LinearRegression()
%time lin_reg3.fit_gd(X_train_standard, y_train)

Wall time: 77 ms


LinearRegression()

In [13]:
X_test_standard = standardScaler.transform(X_test)
lin_reg3.score(X_test_standard, y_test)

0.8009270105386641

### 梯度下降法的优势

In [14]:
m = 1000
n = 5000

big_X = np.random.normal(size=(m, n))

true_theta = np.random.uniform(0.0, 100.0, size=n+1)

big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0., 10., size=m)

In [15]:
big_reg1 = LinearRegression()
%time big_reg1.fit_normal(big_X, big_y)

Wall time: 2.02 s


LinearRegression()

In [16]:
big_reg2 = LinearRegression()
%time big_reg2.fit_gd(big_X, big_y)

Wall time: 1.77 s


LinearRegression()

由此可知，梯度下降法适用于样本数少而特征多的数据集。