### 1. Reorganize Linear Regression in Python mode.

主要是通过矩阵运算,代替掉程序中原有的for循环,可以大大减小程序运行时间

In [10]:
import numpy as np
import random

In [11]:
def inference(w, b, x):        # inference, test, predict, same thing. Run model after training
    pred_y = w * x + b
    return pred_y

In [12]:
def eval_loss(w, b, x_list, gt_y_list):
#     avg_loss = 0.0
#     for i in range(len(x_list)):
#         avg_loss += 0.5 * (w * x_list[i] + b - gt_y_list[i]) ** 2    # loss function
#     avg_loss /= len(gt_y_list)
    y_hat = w*x_list+b
    avg_loss = np.dot((y_hat-gt_y_list),(y_hat-gt_y_list).T)/x_list.shape[1]
    return avg_loss

In [13]:
def gradient(pred_y, gt_y, x):
    diff = pred_y - gt_y
    dw = np.dot(diff,x.T)
    db = np.sum(diff)
    return dw, db

In [14]:
def cal_step_gradient(batch_x_list, batch_gt_y_list, w, b, lr):
#     batch_size = len(batch_x_list)
    batch_size = batch_x_list.shape[1]
    #print(bat)
#     for i in range(batch_size):
#         pred_y = inference(w, b, batch_x_list[i])	# get label data
#         dw, db = gradient(pred_y, batch_gt_y_list[i], batch_x_list[i])
#         avg_dw += dw
#         avg_db += db
    pred_y = inference(w,b,batch_x_list)
    dw, db = gradient(pred_y, batch_gt_y_list, batch_x_list)
    avg_dw = dw/batch_size
    avg_db = db/batch_size
    w -= lr * avg_dw
    b -= lr * avg_db
    return w, b

In [15]:
def train(x_list, gt_y_list, batch_size, lr, max_iter):
    w = 0
    b = 0
#     num_samples = len(x_list)
    num_samples = x_list.shape[1]
    for i in range(max_iter):
        batch_idxs = np.random.choice(num_samples, batch_size)
        batch_x = x_list[:,batch_idxs]
        batch_y = gt_y_list[:,batch_idxs]
#         batch_x = [x_list[j] for j in batch_idxs]
#         batch_y = [gt_y_list[j] for j in batch_idxs]
        w, b = cal_step_gradient(batch_x, batch_y, w, b, lr)
#         print('w:{0}, b:{1}'.format(w, b))
#         print('loss is {0}'.format(eval_loss(w, b, x_list, gt_y_list)))
    return w,b

In [16]:
def gen_sample_data():
    w = random.randint(0, 10) + random.random()		# for noise random.random[0, 1)
    b = random.randint(0, 5) + random.random()
    num_samples = 100
    x_list = []
    y_list = []
#     for i in range(num_samples):
#         x = random.randint(0, 100) * random.random()
#         y = w * x + b + random.random() * random.randint(-1, 1)
#         x_list.append(x)
#         y_list.append(y)
    x_list = np.random.randint(0,100,(1,num_samples))
    y_list = w*x_list+b+random.random()*random.randint(-1, 1)
    return x_list, y_list, w, b

In [17]:
def run():
    x_list, y_list, w, b = gen_sample_data()
    print('w_origin:',w)
    print('b_origin:',b)
    lr = 0.0001
    max_iter = 1000
    w1,b1 = train(x_list, y_list, 50, lr, max_iter)
    print('w1:',w1)
    print('b1:',b1)

In [None]:
if __name__ == '__main__':	# 跑.py的时候，跑main下面的；被导入当模块时，main下面不跑，其他当函数调
    run()

### 结论

对比随机生成的w,b和线性回归估计的w,b可见,w的估计较为精确,而b较差,主要是因为b的梯度值很小,导致b的变化非常缓慢.这主要是因为x的值较大,数据没有进行归一化导致的.