In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## 反向传播

### 一个两层神经网络

$$ \hat{y} = W_2 ( W_1 \cdot x + b_1 ) + b_2 $$

[Matrix Cookbook](https://www.math.uwaterloo.ca/~hwolkowi/matrixcookbook.pdf)

In [3]:
x_data = [float(i) for i in range(1, 6)]
y_data = [2 * i for i in x_data]

w1 = torch.Tensor([0.0])
w1.requires_grad = True

def forward(x):
    return x * w1

def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) ** 2

print(f"Predicted value(Before Training): {forward(4).item()}")

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y)
        l.backward()
        print(f"\t gradient: {w1.grad.item()} x:{x} y:{y}")
        w1.data -= .01 * w1.grad.data
        w1.grad.zero_()
    print(f"Progress: {epoch+1}/{100} Loss: {l.item()}")

print(f"Predicted value(After Training): {forward(4).item()}")

Predicted value(Before Training): 0.0
	 gradient: -4.0 x:1.0 y:2.0
	 gradient: -15.680000305175781 x:2.0 y:4.0
	 gradient: -32.457603454589844 x:3.0 y:6.0
	 gradient: -47.31596755981445 x:4.0 y:8.0
	 gradient: -50.273216247558594 x:5.0 y:10.0
Progress: 1/100 Loss: 25.273963928222656
	 gradient: -1.0054643154144287 x:1.0 y:2.0
	 gradient: -3.941420555114746 x:2.0 y:4.0
	 gradient: -8.15874195098877 x:3.0 y:6.0
	 gradient: -11.893630981445312 x:4.0 y:8.0
	 gradient: -12.636985778808594 x:5.0 y:10.0
Progress: 2/100 Loss: 1.5969340801239014
	 gradient: -0.25273966789245605 x:1.0 y:2.0
	 gradient: -0.9907398223876953 x:2.0 y:4.0
	 gradient: -2.050832748413086 x:3.0 y:6.0
	 gradient: -2.9896583557128906 x:4.0 y:8.0
	 gradient: -3.1765079498291016 x:5.0 y:10.0
Progress: 3/100 Loss: 0.1009020283818245
	 gradient: -0.06353020668029785 x:1.0 y:2.0
	 gradient: -0.2490386962890625 x:2.0 y:4.0
	 gradient: -0.5155105590820312 x:3.0 y:6.0
	 gradient: -0.7514991760253906 x:4.0 y:8.0
	 gradient: -0.798

## 作业

预测模型使用二次模型: 

$$ \hat y = \omega_1x^2 + \omega_2x + b $$

In [4]:
# 这个写错了

x_data = [float(i) for i in range(1, 6)]
y_data = [2 * i for i in x_data]

w1 = torch.Tensor([0.0])
w1.requires_grad = True
w2 = torch.Tensor([0.0])
w2.requires_grad = True
b = torch.Tensor([0.0])
b.requires_grad = True

def forward(x):
    return x ** 2 * w1 + x * w2 + b

def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) ** 2

print(f"Predicted value(Before Training): {forward(4).item()}")

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y)
        l.backward()
        print(f"\t gradient: {w1.grad.item()} x:{x} y:{y}")
        w1.data -= .01 * w1.grad.data
        w1.grad.zero_()
        w2.data -= .01 * w2.grad.data
        w2.grad.zero_()
        b.data -= .01 * b.grad.data
        b.grad.zero_()
    print(f"Progress: {epoch+1}/{100} Loss: {l.item()}")

print(f"Predicted value(After Training): {forward(4).item()}")

Predicted value(Before Training): 0.0
	 gradient: -4.0 x:1.0 y:2.0
	 gradient: -29.760000228881836 x:2.0 y:4.0
	 gradient: -41.05440139770508 x:3.0 y:6.0
	 gradient: 173.85321044921875 x:4.0 y:8.0
	 gradient: -1762.6634521484375 x:5.0 y:10.0
Progress: 1/100 Loss: 1242.79296875
	 gradient: 37.61802291870117 x:1.0 y:2.0
	 gradient: 540.006591796875 x:2.0 y:4.0
	 gradient: 1652.2308349609375 x:3.0 y:6.0
	 gradient: -3906.296875 x:4.0 y:8.0
	 gradient: 42382.22265625 x:5.0 y:10.0
Progress: 2/100 Loss: 718501.125
	 gradient: -979.8092041015625 x:1.0 y:2.0
	 gradient: -13397.98828125 x:2.0 y:4.0
	 gradient: -39779.1015625 x:3.0 y:6.0
	 gradient: 95917.1328125 x:4.0 y:8.0
	 gradient: -1037661.9375 x:5.0 y:10.0
Progress: 3/100 Loss: 430696896.0
	 gradient: 23913.275390625 x:1.0 y:2.0
	 gradient: 327609.96875 x:2.0 y:4.0
	 gradient: 973873.875 x:3.0 y:6.0
	 gradient: -2346357.25 x:4.0 y:8.0
	 gradient: 25386626.0 x:5.0 y:10.0
Progress: 4/100 Loss: 257792327680.0
	 gradient: -585118.4375 x:1.0 y

In [5]:
# 导入必要的模块
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

# 定义数据
x_data = torch.Tensor([float(i) for i in range(1, 6)])
y_data = torch.Tensor([2 * i for i in x_data])

# 定义数据集和数据加载器
dataset = data.TensorDataset(x_data, y_data)
dataloader = data.DataLoader(dataset, batch_size=1, shuffle=True)

# 定义模型
model = nn.Sequential(
    nn.Linear(1, 1), # 线性层，输入和输出都是一维的
    nn.PReLU(), # 激活函数，可以增加模型的非线性
    nn.Linear(1, 1) # 线性层，输入和输出都是一维的
)

# 定义损失函数
criterion = nn.MSELoss()

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 定义训练轮数
epochs = 100

# 训练模型
print(f"Predicted value(Before Training): {model(torch.Tensor([4])).item()}")
for epoch in range(epochs):
    running_loss = 0.0
    for x, y in dataloader:
        # 清零梯度
        optimizer.zero_grad()
        # 前向传播
        y_pred = model(x)
        # 计算损失
        loss = criterion(y_pred, y)
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()
        # 累计损失
        running_loss += loss.item()
    # 打印每轮的平均损失
    print(f"Progress: {epoch+1}/{epochs} Loss: {running_loss/len(dataloader)}")
print(f"Predicted value(After Training): {model(torch.Tensor([4])).item()}")


Predicted value(Before Training): 0.7660875916481018
Progress: 1/100 Loss: 34.55890545845032
Progress: 2/100 Loss: 32.43955912590027
Progress: 3/100 Loss: 29.33141803741455
Progress: 4/100 Loss: 26.848632526397704
Progress: 5/100 Loss: 23.129837942123412
Progress: 6/100 Loss: 20.25961079597473
Progress: 7/100 Loss: 17.214896512031554
Progress: 8/100 Loss: 14.091981887817383
Progress: 9/100 Loss: 10.496461084485054
Progress: 10/100 Loss: 7.55832233466208
Progress: 11/100 Loss: 4.927307554380969
Progress: 12/100 Loss: 3.0796390742063524
Progress: 13/100 Loss: 1.8786957561969757
Progress: 14/100 Loss: 0.7441763982176781
Progress: 15/100 Loss: 0.3114735117182136
Progress: 16/100 Loss: 0.33080676645040513
Progress: 17/100 Loss: 0.3056004121084698
Progress: 18/100 Loss: 0.3372853070497513
Progress: 19/100 Loss: 0.33312640599906446
Progress: 20/100 Loss: 0.30767078828066585
Progress: 21/100 Loss: 0.27925955690407134
Progress: 22/100 Loss: 0.26400391552597285
Progress: 23/100 Loss: 0.260782764