In [22]:
import torch
import numpy as np
import torch.nn as nn

In [3]:
# method 1
x = torch.randn(3, 4, requires_grad=True)
x

tensor([[-0.3062, -0.2004, -2.3097, -2.2580],
        [ 0.6715, -1.2551,  1.1644, -1.7252],
        [-0.7398, -1.2618,  0.1555,  0.1586]], requires_grad=True)

In [4]:
# method 2
x = torch.randn(3, 4)
x.requires_grad = True
x

tensor([[ 0.7330,  0.0479,  1.0397,  0.0680],
        [-0.0436,  0.8306,  0.4466,  1.6647],
        [ 1.5288, -1.9899, -2.0221, -1.0470]], requires_grad=True)

In [5]:
b = torch.randn(3, 4, requires_grad = True)

In [6]:
t = x + b

In [7]:
y = t.sum()
y

tensor(-0.9315, grad_fn=<SumBackward0>)

In [8]:
y.backward()

t = x + b 对 b 求导

In [9]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

用到的 t 的 requires_grad 也会变为 true

In [10]:
x.requires_grad, b.requires_grad, t.requires_grad

(True, True, True)

![](images/chain.png)

In [11]:
x = torch.rand(1)
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)

y = w * x
z = y + b

In [12]:
x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad

(False, True, True, True)

In [13]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf

(True, True, True, False, False)

反向传播计算

In [14]:
z.backward(retain_graph=True) # 不清空梯度，会累加

In [15]:
w.grad

tensor([0.2809])

In [16]:
b.grad

tensor([1.])

线性回归 https://blog.csdn.net/hgnuxc_1993/article/details/115046874

In [19]:
x_values = [i for i in range (11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
x_train.shape

(11, 1)

In [20]:
y_values = [2 * i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
y_train.shape

(11, 1)

线性回归模型
  - 其实线性回归就是一个不加激活函数的全连接层

In [23]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return out

In [24]:
input_dim = 1
output_dim = 1

model = LinearRegressionModel(input_dim, output_dim)

In [25]:
model

LinearRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

In [28]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss() # Mean Square Error

In [31]:
for epoch in range(epochs):
    epoch += 1

    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    
    # 每一次迭代 梯度清零
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 反向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()

    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 9.676247714196506e-08
epoch 100, loss 5.5141018862059354e-08
epoch 150, loss 3.154044847519799e-08
epoch 200, loss 1.8006669932901787e-08
epoch 250, loss 1.0317020127104115e-08
epoch 300, loss 5.881231945892296e-09
epoch 350, loss 3.3407603350354975e-09
epoch 400, loss 1.915149150377715e-09
epoch 450, loss 1.114575765015502e-09
epoch 500, loss 6.271495545107086e-10
epoch 550, loss 3.6789060686714947e-10
epoch 600, loss 2.14025810962859e-10
epoch 650, loss 1.2437630092509977e-10
epoch 700, loss 7.471292334804147e-11
epoch 750, loss 4.55835785784231e-11
epoch 800, loss 2.911707333574931e-11
epoch 850, loss 1.5593505653388462e-11
epoch 900, loss 1.5593505653388462e-11
epoch 950, loss 1.5593505653388462e-11
epoch 1000, loss 1.5593505653388462e-11


In [37]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 0.99999267],
       [ 2.9999938 ],
       [ 4.999995  ],
       [ 6.9999967 ],
       [ 8.999997  ],
       [10.999998  ],
       [13.        ],
       [15.000001  ],
       [17.000002  ],
       [19.000004  ],
       [21.000004  ]], dtype=float32)

模型的保存与读取

In [39]:
torch.save(model.state_dict(), 'model.pkl')

In [40]:
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

使用 GPU 进行训练
  - 只需要将数据和模型传到 cuda 里面

In [43]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device) # diff

epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss() # Mean Square Error

for epoch in range(epochs):
    epoch += 1

    inputs = torch.from_numpy(x_train).to(device) # diff
    labels = torch.from_numpy(y_train).to(device) # diff
    
    # 每一次迭代 梯度清零
    optimizer.zero_grad()

    # 前向传播
    outputs = model(inputs)

    # 计算损失
    loss = criterion(outputs, labels)

    # 反向传播
    loss.backward()

    # 更新权重参数
    optimizer.step()

    if epoch % 50 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))

epoch 50, loss 1.5593505653388462e-11
epoch 100, loss 1.5593505653388462e-11
epoch 150, loss 1.5593505653388462e-11
epoch 200, loss 1.5593505653388462e-11
epoch 250, loss 1.5593505653388462e-11
epoch 300, loss 1.5593505653388462e-11
epoch 350, loss 1.5593505653388462e-11
epoch 400, loss 1.5593505653388462e-11
epoch 450, loss 1.5593505653388462e-11
epoch 500, loss 1.5593505653388462e-11
epoch 550, loss 1.5593505653388462e-11
epoch 600, loss 1.5593505653388462e-11
epoch 650, loss 1.5593505653388462e-11
epoch 700, loss 1.5593505653388462e-11
epoch 750, loss 1.5593505653388462e-11
epoch 800, loss 1.5593505653388462e-11
epoch 850, loss 1.5593505653388462e-11
epoch 900, loss 1.5593505653388462e-11
epoch 950, loss 1.5593505653388462e-11
epoch 1000, loss 1.5593505653388462e-11
