In [4]:
# 一阶拟合、二阶拟合
%matplotlib qt
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(0)

def create_samples(sample_number=30):
    area = np.sort(np.random.randint(30, 100, size=sample_number))
    price = 100.0 * np.power(area - area.min() +1, 1/3) +200.0
    noise = np.random.randint(-20, 20, size=sample_number)
    return area, price + noise

def get_model(area, price, order):
    coefficients = np.polyfit(area, price, order)  # order是指几阶
    return np.poly1d(coefficients)  # 返回一个函数  1d是指一个变量

plt.figure(figsize=(8,6))
area, price = create_samples(50)
first_order_poynomial = get_model(area, price, 1)
second_order_polynomial = get_model(area, price, 2)

plt.xlabel('Area', fontsize=20)
plt.ylabel('Price', fontsize=20)
plt.scatter(area, price)
plt.plot(area, first_order_poynomial(area), color='r')
plt.plot(area, second_order_polynomial(area), color='g')
plt.show()

x = 85
print(first_order_poynomial(x))

585.8856243387602


In [6]:
# SGD 梯度下降法
import math

np.random.seed(0)
x = 4 * np.random.randn(50) # randn高斯 分布
noise = 0.3 * np.random.randn(50)
y = 1 * x + 1 + noise

theta0, theta1 = 0.0, 0.0
alpha = 0.05  #0.01   0.05更好，100次就够了
loss = lambda t0, t1: 0.5 * np.square(y - (x * t1 +t0)).sum() / x.size  # lambda快速生成函数
gradient1 = lambda t0, t1: -((y - (x * t1 + t0)) * x).sum() / x.size
gradient0 = lambda t0, t1: -(y - (x * t1 + t0)).sum() / x.size

losses = []
print(f'{"Step":^8}{"theta0":^8}{"theta1":^8}{"g0":^8}{"g1":^8}{"loss":^16}')
for index in range(400):
    g0 = gradient0(theta0, theta1)
    g1 = gradient1(theta0, theta1)
    current_loss = loss(theta0, theta1)
    losses.append(math.log(current_loss))   # log方便显示，放大有些变化太快的地方

    theta0 += -alpha * g0
    theta1 += -alpha * g1
    print(f'{index:^8}{theta0:^8.4f}{theta1:^8.4f}{g0:^8.4f}{g1:^8.4f}{current_loss}')

fig1 = plt.figure()
fig1.gca().scatter(x, y)
x1 = [i / 2.5 -10 for i in range(0, 50)]
y_hat = [theta1 * x + theta0 for x in x1]
fig1.gca().plot(x1, y_hat)
fig1.show()

fig2 = plt.figure()
fig2.gca().plot(losses)
fig2.show()


  Step   theta0  theta1    g0      g1         loss      
   0     0.0778  1.0537 -1.5560 -21.074311.309437112836278
   1     0.1221  1.0207 -0.8857  0.6601 0.4590451813692541
   2     0.1651  1.0204 -0.8600  0.0056 0.40940444452875513
   3     0.2059  1.0192 -0.8171  0.0240 0.37334239107706424
   4     0.2448  1.0181 -0.7770  0.0223 0.34074907582281944
   5     0.2817  1.0171 -0.7387  0.0212 0.31128365907368083
   6     0.3168  1.0161 -0.7024  0.0201 0.28464595778125973
   7     0.3502  1.0151 -0.6678  0.0192 0.2605646038211861
   8     0.3820  1.0142 -0.6350  0.0182 0.23879427153220997
   9     0.4122  1.0133 -0.6038  0.0173 0.21911317848577117
   10    0.4409  1.0125 -0.5741  0.0165 0.20132082610634874
   11    0.4682  1.0117 -0.5458  0.0157 0.18523595711830132
   12    0.4941  1.0110 -0.5190  0.0149 0.17069470901094738
   13    0.5188  1.0103 -0.4934  0.0142 0.15754894471055128
   14    0.5422  1.0096 -0.4692  0.0135 0.14566474345315203
   15    0.5646  1.0089 -0.4461  0.0128 0.1349

In [9]:
# 一帧一帧生成gif动画
import imageio

x = np.array([1, 2, 3])
y = np.array([1, 2, 3])

theta_list = np.arange(0, 2.1, 0.1)
loss = []
for index, theta in enumerate(theta_list):
    loss.append(np.square((y - x * theta)).sum())

def plot_frame(x, y, theta_list, loss, i):
    fig = plt.figure(figsize=(10, 5))
    ax = plt.subplot(1, 2, 1)
    ax.plot(theta_list[:i + 1], loss[:i + 1], linewidth=3)
    ax.grid()  # 固定住
    ax.set_xlim(0, 2)
    ax.set_ylim(0, 20)

    ax = plt.subplot(1, 2, 2)
    ax.plot([0, 4], [0, 4 * theta_list[i]], linewidth=3)
    ax.scatter(x, y, marker='o', c='b', s=300)
    ax.grid()
    ax.set_xlim(0, 4)
    ax.set_ylim(0, 4)
    
    fig.canvas.draw()
    image = np.frombuffer(fig.canvas.buffer_rgba(), dtype='uint8')  # 把画布的数据取出来
    plt.close()
    return image.reshape(fig.canvas.get_width_height()[::-1] + (4, ))  #::-1倒序,原来是宽度高度现在是高度宽度

imageio.mimsave('./data_and_loss.gif', 
                [plot_frame(x, y, theta_list, loss, i) for i in range(20)], fps=1.0)