# 编译模型

- 损失函数
- 优化器
- 评价指标

In [14]:
import torch
from torch import nn
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
%matplotlib inline

torch.manual_seed(1)

# Hyper Parameters
EPOCH = 1               # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 64
TIME_STEP = 28          # rnn time step / image height
INPUT_SIZE = 28         # rnn input size / image width
LR = 0.01               # learning rate
DOWNLOAD_MNIST = True   # set to True if haven't download the data

mnist_base_path="/Users/zhouwencheng/Desktop/Grass/data/picture/mnist"
train_data = dsets.MNIST(
    root = mnist_base_path,
    train=True,
    transform=transforms.ToTensor(), # (0, 1)
    download=DOWNLOAD_MNIST
)

print(train_data.train_data.size())
print(train_data.targets.size())

train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)
test_data = dsets.MNIST(root=mnist_base_path, 
                        train=False, 
                        transform=transforms.ToTensor())
test_x = Variable(test_data.test_data, volatile=True).type(torch.FloatTensor)[:2000]/255.
test_y = test_data.targets.numpy().squeeze()[:2000]

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(               # if use nn.RNN(), it hardly learns
            input_size = INPUT_SIZE,
            hidden_size = 64,     # rnn hidden unit
            num_layers = 1,        # number of rnn layer
            batch_first = True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)
        
    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) # None represents zero initial hidden state

        # choose r_out at the last time step
        out = self.out(r_out[:, -1,:])
        return out
    
rnn = RNN()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

# train and testing
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):
        b_x = Variable(x.view(-1, 28, 28))
        b_y = Variable(y)
        
        output = rnn(b_x)
        loss = loss_func(output, b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % 100 == 0:
            test_output = rnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
            accuracy = sum(pred_y == test_y)/float(test_y.size)
            print(f'Epoch:  {epoch} | step:{step} | train loss: {loss.data} | test accuracy: {accuracy}')
#             print(loss.data)
    print("OK")
            

torch.Size([60000, 28, 28])
torch.Size([60000])
RNN(
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)




Epoch:  0 | step:0 | train loss: 2.2883260250091553 | test accuracy: 0.1025
Epoch:  0 | step:100 | train loss: 0.9628912210464478 | test accuracy: 0.733
Epoch:  0 | step:200 | train loss: 0.4038448631763458 | test accuracy: 0.8375
Epoch:  0 | step:300 | train loss: 0.2836182117462158 | test accuracy: 0.8975
Epoch:  0 | step:400 | train loss: 0.21243950724601746 | test accuracy: 0.913
Epoch:  0 | step:500 | train loss: 0.11269920319318771 | test accuracy: 0.9175
Epoch:  0 | step:600 | train loss: 0.05201313644647598 | test accuracy: 0.9405
Epoch:  0 | step:700 | train loss: 0.13574619591236115 | test accuracy: 0.955
Epoch:  0 | step:800 | train loss: 0.12530261278152466 | test accuracy: 0.942
Epoch:  0 | step:900 | train loss: 0.18430180847644806 | test accuracy: 0.946
OK


# Loss funcations


[参考地址=======torch中文文档](https://pytorch-cn.readthedocs.io/zh/latest/package_references/torch-nn/#loss-functions)

基本用法
```python
criterion = LossCriterion() # 构建函数自己的参数
loss = criterion(x, y) # 调用
```

- class torch.nn.L1Loss(size_average=True)
    - 创建一个衡量输入x(模型预测输出)和目标y之间差的绝对值的平均值的标准。
    
- class torch.nn.MSELoss(size_average=True)
    - 创建一个衡量输入x(模型预测输出)和目标y之间均方误差标准。
- class torch.nn.CrossEntropyLoss(weight=None, size_average=True)
    - 此标准将LogSoftMax和NLLLoss集成到一个类中
- class torch.nn.NLLLoss(weight=None, size_average=True)
    - 负的log likelihood loss损失。用于训练一个n类分类器。
- class torch.nn.NLLLoss2d(weight=None, size_average=True)
    - 对于图片的 negative log likehood loss。计算每个像素的 NLL loss。
- class torch.nn.KLDivLoss(weight=None, size_average=True)
    - 计算 KL 散度损失
- class torch.nn.BCELoss(weight=None, size_average=True)
    - 计算 target 与 output 之间的二进制交叉熵
- class torch.nn.MarginRankingLoss(margin=0, size_average=True)
    - 创建一个标准，给定输入 $x1$,$x2$两个1-D mini-batch Tensor's，和一个$y$(1-D mini-batch tensor) ,$y$里面的值只能是-1或1。
- class torch.nn.HingeEmbeddingLoss(size_average=True)
    - 
- class torch.nn.MultiLabelMarginLoss(size_average=True)
- class torch.nn.SmoothL1Loss(size_average=True)
    - 平滑版L1 loss
- class torch.nn.SoftMarginLoss(size_average=True)
    - 创建一个标准，用来优化2分类的logistic loss。
- class torch.nn.MultiLabelSoftMarginLoss(weight=None, size_average=True)
    - 创建一个标准，基于输入x和目标y的 max-entropy，优化多标签 one-versus-all 的损失。
- class torch.nn.CosineEmbeddingLoss(margin=0, size_average=True)
    - 给定 输入 Tensors，x1, x2 和一个标签Tensor y(元素的值为1或-1)。
- class torch.nn.MultiMarginLoss(p=1, margin=1, weight=None, size_average=True)
    - 用来计算multi-class classification的hinge loss（magin-based loss）

# Optimizer

torch.optim

[参考地址---------- torch中文文档](https://pytorch-cn.readthedocs.io/zh/latest/package_references/torch-optim/#torchoptim)

```python
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr=0.0001)
```

- class torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)
    - 实现Adadelta算法

- class torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0)
    - 实现Adagrad算法

- class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    - 实现Adam算法
    
- class torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    - 实现Adamax算法（Adam的一种基于无穷范数的变种）
- class torch.optim.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
    - 实现平均随机梯度下降算法。
- class torch.optim.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-05, tolerance_change=1e-09, history_size=100, line_search_fn=None)
    - 实现L-BFGS算法
- class torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    - 实现RMSprop算法
- class torch.optim.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50))
    - 实现弹性反向传播算法。
- class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
    - 实现随机梯度下降算法（momentum可选）

# 评价指标

- 官方没有实现，需要自己计算咯