# 批量梯度下降法

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
m = 100000
np.random.seed(666)
x = 2 * np.random.random(size = m)
X = x.reshape(-1,1)
y = x * 3. + 4. + np.random.normal(size = m)

In [5]:
def J(theta,X_b,y):
    try:
        return np.sum((y - X_b.dot(theta))**2)/len(X_b)
    except:
        return float('inf')

def dJ(theta,X_b,y):
    res = np.empty(len(theta))
    res[0] = np.sum(X_b.dot(theta) - y)
    for i in range(1,len(theta)):
        res[i] = (X_b.dot(theta)-y).dot(X_b[:,i])
    return res*2/ len(X_b)

def gradient_decent(X_b,y,initial_theta,eta,n_iters = 1e4,epsilon=1e-8):
    theta=initial_theta

    i_iter = 0
    while i_iter<n_iters:
        gradient = dJ(theta,X_b,y)
        last_theta = theta
        theta = theta - eta*gradient
        if(abs(J(theta,X_b,y)-J(last_theta,X_b,y))<epsilon):
            break
        i_iter+=1
    return theta

In [6]:
X_b = np.hstack([np.ones((len(x),1)),X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01

theta = gradient_decent(X_b,y,initial_theta,eta)

In [7]:
theta

array([3.99839758, 3.002347  ])

## 随机梯度下降法

In [12]:
#采用向量化处理
def dJ_l(theta,X_b,y):
    return X_b.T.dot(X_b.dot(theta)-y) *2./len(y)
#只选取一个样本
def dJ_sgd(theta,X_b_i,y_i):
    return X_b_i.T.dot(X_b_i.dot(theta)-y_i) *2.

def sgd(X_b,y,initial_theta,n_iters):
    t0 = 5
    t1 = 50
    def learning_rates(t):
        return t0/(t+t1)
    #由于随机梯度下降的不确定定性，有可能两次差距非常小，是偶然的，故而判断条件只采用循环次数
    theta = initial_theta
    for cur_iter in range(n_iters):
        rand_i = np.random.randint(len(X_b))  #随机样本索引，只查看一个训练样本并进行更新
        gradient = dJ_sgd(theta,X_b[rand_i],y[rand_i])
        theta = theta - learning_rates(cur_iter)*gradient
    return theta

In [13]:
%%time
X_b = np.hstack([np.ones((len(x),1)),X])
initial_theta = np.zeros(X_b.shape[1])
theta = sgd(X_b,y,initial_theta,n_iters=len(X_b)//3)

Wall time: 292 ms


In [14]:
theta

array([3.99220562, 3.00173199])

# scikit_learn 中的SGD

In [17]:
from sklearn.linear_model import SGDRegressor

In [None]:
sgd_reg = SGDRegressor(n_iter=100)  #默认值为5，表示样本浏览多少次
%time sgd_reg.fit(X_train_standard,y_train)
sgd_reg.score(X_test_standard,y_test)