# 此样例联系模型的存储和读取
## 输入
x1, x2
## 输出
y = x1 + x2 >= 0? 1: 0;
## 实现方法
自定义模块，继承`torch.nn.Module`并定义`forward`函数

In [1]:
import torch

## 产生数据
使用`torch.randn()`随机生成满足标准正态分布的张量，size为$1000\times2$。

In [2]:
x = torch.randn(1000, 2)
x

使用`torch.sum(input, dim, keepdim=False, dtype=None)`生成label，其参数如下：
* `input`：需要求和的tensor
* `dim`：需要求和的维度
* `keepdim`：默认为False，如果为True则求和后的输出的维数与input相同

也可以使用`y[x.sum(dim=1) >= 0] = 1`生成label。

In [3]:
y = torch.zeros(1000, 1)
y[torch.sum(x, dim=1) >= 0] = 1
# y[x.sum(dim=1) >= 0] = 1
y

## 建立模型并训练
### 建立网络
自定义模块，继承`torch.nn.Module`并定义`forward`函数。

也可以通过控制流（循环语句）重复使用相同模块（如某一隐藏层）。
```python
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.h = torch.nn.Linear(H, H)
        self.linear2 = torch.nn.Linear(H, D_out)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        relu = self.relu(self.linear1(x))
        for _ in range(3):
            relu = self.relu(self.h(relu))
        y_pred = self.sigmoid(self.linear2(relu))
        return y_pred
```
可以通过`list(model.parameters())[index]`查看对应层的参数，其中重复使用的模块权值共享。

In [4]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        relu = self.relu(self.linear1(x))
        y_pred = self.sigmoid(self.linear2(relu))
        return y_pred

### 损失函数
损失函数使用`torch.nn.BCELoss()`，即Binary Cross Entrophy Loss，适用于使用Sigmoid激活函数的二分类问题。
* `input`: Tensor of arbitrary shape
* `target`: Tensor of the same shape as input

### 优化器
使用`torch.optim.Adam()`进行梯度下降，其重要的参数为：
* `params`：网络的权重，通过`model.parameters()`获取，为可迭代类型
* `lr`：学习率learning_rate，默认为1e-3

In [5]:
model = TwoLayerNet(2, 4, 1)
learning_rate = 1e-3
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(model)

### 自动求导
* `optimizer.zero_grad()`：在反向传播之前，使用optimizer将它要更新的所有张量的梯度清零(这些张量是模型可学习的权重)
* `loss.backward()`：反向传播：根据模型的参数计算loss的梯度
* `optimizer.step()`：调用Optimizer的step函数使它所有参数更新

也可将以上三条语句替换为以下代码：
```python
# 反向传播之前清零梯度
model.zero_grad()
loss.backward()
# 使用梯度下降更新权重。
# 每个参数都是张量，所以我们可以像我们以前那样可以得到它的数值和梯度
with torch.no_grad():
    for param in model.parameters():
        param -= learning_rate * param.grad
```
此时不使用优化器，而是在`torch.no_grad()`上下文环境中更新梯度。

In [6]:
for t in range(1000):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    print(t, loss.item())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 0.7108566761016846
1 0.7098792791366577
2 0.7089057564735413
3 0.7079359292984009
4 0.7069706916809082
5 0.7060105800628662
6 0.705054521560669
7 0.7041033506393433
8 0.7031581401824951
9 0.702218234539032
10 0.701284646987915
11 0.7003559470176697
12 0.6994321346282959
13 0.6985144019126892
14 0.6976024508476257
15 0.6966961622238159
16 0.6957949995994568
17 0.694898784160614
18 0.6940069198608398
19 0.6931210160255432
20 0.6922411918640137
21 0.6913670897483826
22 0.6904979944229126
23 0.6896365284919739
24 0.6887794733047485
25 0.6879273653030396
26 0.6870818734169006
27 0.68623948097229
28 0.6854029893875122
29 0.6845721006393433
30 0.6837436556816101
31 0.6829202771186829
32 0.6821016073226929
33 0.6812883019447327
34 0.6804792881011963
35 0.6796746253967285
36 0.6788732409477234
37 0.6780770421028137
38 0.6772847175598145
39 0.6764959096908569
40 0.6757124066352844
41 0.6749334335327148
42 0.674160361289978
43 0.6733903884887695
44 0.6726235151290894
45 0.6718600988388062
46 0.

559 0.3106587827205658
560 0.3100413978099823
561 0.3094254732131958
562 0.3088110685348511
563 0.30819767713546753
564 0.3075858950614929
565 0.3069759011268616
566 0.306367427110672
567 0.30576092004776
568 0.3051562011241913
569 0.3045528531074524
570 0.3039519488811493
571 0.3033520579338074
572 0.3027542233467102
573 0.3021581470966339
574 0.3015638589859009
575 0.30097052454948425
576 0.30037859082221985
577 0.2997884154319763
578 0.2991999387741089
579 0.29861316084861755
580 0.29802826046943665
581 0.297445684671402
582 0.29686370491981506
583 0.29628297686576843
584 0.2957042157649994
585 0.2951275706291199
586 0.29455187916755676
587 0.29397818446159363
588 0.2934064269065857
589 0.29283708333969116
590 0.29226914048194885
591 0.29170292615890503
592 0.291138619184494
593 0.29057663679122925
594 0.2900160849094391
595 0.28945785760879517
596 0.2889007031917572
597 0.2883458435535431
598 0.2877925634384155
599 0.28724074363708496
600 0.28669071197509766
601 0.2861415147781372


991 0.16038228571414948
992 0.1602039486169815
993 0.16002614796161652
994 0.15984871983528137
995 0.15967172384262085
996 0.15949523448944092
997 0.15931902825832367
998 0.15914317965507507
999 0.1589680165052414


## 创建测试样例并进行测试

In [7]:
x_test = torch.randn(100, 2)
x_test

tensor([[ 0.0471,  1.6578],
        [ 0.1347, -0.9502],
        [-0.3595, -0.0066],
        [-0.6453,  0.6148],
        [-0.7034, -0.3515],
        [ 0.1038, -0.9994],
        [-0.6848,  0.2726],
        [ 0.4836, -0.3850],
        [-0.1286, -0.7618],
        [ 0.8934, -1.2505],
        [ 1.0801, -0.9338],
        [-0.6263,  0.3724],
        [-0.2332,  0.6058],
        [ 0.3896,  0.5382],
        [-0.0737, -0.3575],
        [ 0.6664, -1.6678],
        [ 0.5274,  1.2645],
        [-0.1033,  1.2831],
        [-0.1164,  0.4905],
        [ 3.0742, -0.1289],
        [ 2.0041, -0.0471],
        [ 0.4675, -0.7753],
        [ 1.3200,  0.2209],
        [-0.8295,  0.4261],
        [-1.2080, -0.8760],
        [-0.6799,  0.7134],
        [-1.7716, -0.0638],
        [ 1.3408,  2.0215],
        [-0.3532,  0.6015],
        [-0.9768,  0.0186],
        [-0.0430, -0.2725],
        [ 0.5445, -0.4543],
        [ 1.1438, -0.1236],
        [ 1.9100,  1.2751],
        [-1.1048,  0.9017],
        [-0.9528,  0

In [8]:
y_test = torch.zeros(100, 1)
y_test[torch.sum(x_test, dim=1) >= 0] = 1
# y_test[x_test.sum(dim=1) >= 0] = 1
y_test

tensor([[1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
      

In [9]:
y_test_pred = torch.zeros(100, 1)
y_test_pred[model(x_test) >= .5] = 1
y_test_pred

tensor([[1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
      

In [10]:
result = torch.zeros(100, 1)
result[y_test == y_test_pred] = 1
print('accurate: {}'.format(result.sum().item() / 100))

accurate: 0.99


## 保存和加载模型
### 仅保存和加载模型参数

In [11]:
model.state_dict()

OrderedDict([('linear1.weight', tensor([[ 1.3685,  1.2031],
                      [-1.2733, -1.1584],
                      [ 0.1816,  0.3188],
                      [-0.0084,  0.1878]])),
             ('linear1.bias', tensor([ 0.7925,  0.8872, -0.5934, -0.4613])),
             ('linear2.weight',
              tensor([[ 0.9867, -1.3702, -0.0533, -0.1961]])),
             ('linear2.bias', tensor([0.3543]))])

#### 保存

In [12]:
torch.save(model.state_dict(), 'only_parameters.pt')

#### 加载

In [13]:
model2 = TwoLayerNet(2, 4, 1)
model2.load_state_dict(torch.load('only_parameters.pt'))
model2.state_dict()

OrderedDict([('linear1.weight', tensor([[ 1.3685,  1.2031],
                      [-1.2733, -1.1584],
                      [ 0.1816,  0.3188],
                      [-0.0084,  0.1878]])),
             ('linear1.bias', tensor([ 0.7925,  0.8872, -0.5934, -0.4613])),
             ('linear2.weight',
              tensor([[ 0.9867, -1.3702, -0.0533, -0.1961]])),
             ('linear2.bias', tensor([0.3543]))])

In [14]:
y_test_pred = torch.zeros(100, 1)
y_test_pred[model2(x_test) >= .5] = 1
result = torch.zeros(100, 1)
result[y_test == y_test_pred] = 1
print('accurate: {}'.format(result.sum().item() / 100))

accurate: 0.99


### 保存和加载整个模型
#### 保存

In [15]:
torch.save(model, 'whole_model.pt')

  "type " + obj.__name__ + ". It won't be checked "


#### 加载

In [16]:
model3 = torch.load('whole_model.pt')
model3.state_dict()

OrderedDict([('linear1.weight', tensor([[ 1.3685,  1.2031],
                      [-1.2733, -1.1584],
                      [ 0.1816,  0.3188],
                      [-0.0084,  0.1878]])),
             ('linear1.bias', tensor([ 0.7925,  0.8872, -0.5934, -0.4613])),
             ('linear2.weight',
              tensor([[ 0.9867, -1.3702, -0.0533, -0.1961]])),
             ('linear2.bias', tensor([0.3543]))])

In [17]:
y_test_pred = torch.zeros(100, 1)
y_test_pred[model3(x_test) >= .5] = 1
result = torch.zeros(100, 1)
result[y_test == y_test_pred] = 1
print('accurate: {}'.format(result.sum().item() / 100))

accurate: 0.99
