In [1]:
import torch
from torch import nn
import numpy as np

# RNN

In [2]:
rnn = nn.RNN(input_size = 100, hidden_size = 20, num_layers = 1)
'''
input_size表示每一个单词的特征维度大小
hidden_size表示每一个隐藏层特征维度的大小
num_layers表示有多少个RNN网络层相连
'''
print(rnn)

RNN(100, 20)


In [3]:
#准备数据
x = torch.randn(10, 3, 100)#表示的是3句话，每句话有10个单词，每个单词的维度是100
out, h = rnn(x, torch.zeros(1, 3, 20))#这里的torch.zeros(1, 3, 20)表示的0语义
print(out.shape, h.shape)#out(存储着最后一层RNN每一个单词循环后的语义信息) h(存储着每一层RNN最后一个时间序列数据的语义信息)
'''
如果存在2层RNN out [10, 3, 20] h [2, 3, 20] 
'''

torch.Size([10, 3, 20]) torch.Size([1, 3, 20])


'\n如果存在2层RNN out [10, 3, 20] h [2, 3, 20] \n'

# RNNCELL

In [4]:
cell1 = nn.RNNCell(100, 20)#input_size hidden_size
x = torch.randn(10, 3, 100)
h1 = torch.zeros(3, 20)#句子数量 隐藏层数量， 而第几个时间序列的信息则不再需要
for xt in x:
    h1 = cell1(xt, h1)
print(h1.shape)

torch.Size([3, 20])


In [5]:
#多层循环
cell1 = nn.RNNCell(100, 30)
cell2 = nn.RNNCell(30, 20)
h1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
for xt in x:
    h1 = cell1(xt, h1)
    h2 = cell2(h1, h2)
print(h2.shape)

torch.Size([3, 20])


# 正弦曲线预测

In [6]:
num_time_steps = 50
start = np.random.randint(3, size = 1)[0]
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
data.shape

(50, 1)

In [7]:
x = torch.tensor(data[: -1]).float().view(1, num_time_steps - 1, 1)#[b, sequence, f]
y = torch.tensor(data[1:  ]).float().view(1, num_time_steps - 1, 1)
'''
x当前值 
y预测值
'''
x.shape, y.shape

(torch.Size([1, 49, 1]), torch.Size([1, 49, 1]))

In [8]:
#model
class MyRNN(nn.Module):
    def __init__(self):
        super(MyRNN, self).__init__()
        self.rnn = nn.RNN(
        input_size = 1,
        hidden_size = 20,
        num_layers = 1,
        batch_first = True,
        )#[b, sequence, f]
        self.linear = nn.Linear(20, 1)
    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev)
        # [1, seq, f] => [seq, f]
        out = out.view(-1, 20)
        out = self.linear(out)# [seq, f] => []
        out = out.unsqueeze(dim = 0) # => [1, seq, 1]
        return out, hidden_prev

In [9]:
model = MyRNN()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
num_time_steps = 50
hidden_prev = torch.zeros(1, 1, 20)
for iter in range(10000):
    start = np.random.randint(3, size = 1)[0]
    time_steps = np.linspace(start, start + 10, num_time_steps)
    data = np.sin(time_steps)
    data = data.reshape(num_time_steps, 1)
    x = torch.tensor(data[: -1]).float().view(1, num_time_steps - 1, 1)#[b, sequence, f]
    y = torch.tensor(data[1:  ]).float().view(1, num_time_steps - 1, 1)
    
    output, hidden_prev = model(x, hidden_prev)
    hidden_prev = hidden_prev.detach()#终止传播，共享内存
    
    loss = criterion(output, y)
    model.zero_grad()#清除梯度
    loss.backward()#误差反串，梯度计算
    optimizer.step()#优化更新权值、偏置
    
    if iter % 100 == 0:
        print('epoch:{}, loss:{}'.format(iter, loss.item()))

epoch:0, loss:0.5675114393234253
epoch:100, loss:0.0739104300737381
epoch:200, loss:0.05200529098510742
epoch:300, loss:0.04295363277196884
epoch:400, loss:0.015034189447760582
epoch:500, loss:0.009155788458883762
epoch:600, loss:0.004302713554352522
epoch:700, loss:0.004083643201738596
epoch:800, loss:0.005223941523581743
epoch:900, loss:0.0018991539254784584
epoch:1000, loss:0.00023260949819814414
epoch:1100, loss:0.00962231494486332
epoch:1200, loss:0.001360613969154656
epoch:1300, loss:0.0013375337002798915
epoch:1400, loss:0.006682169623672962
epoch:1500, loss:0.0022227840963751078
epoch:1600, loss:0.0014852742897346616
epoch:1700, loss:0.0002992559166159481
epoch:1800, loss:0.00789389107376337
epoch:1900, loss:0.008290546014904976
epoch:2000, loss:0.006468611769378185
epoch:2100, loss:0.0009865176398307085
epoch:2200, loss:0.0007525860564783216
epoch:2300, loss:0.00430799787864089
epoch:2400, loss:0.00610161479562521
epoch:2500, loss:0.0029675632249563932
epoch:2600, loss:0.00073

In [10]:
prediction = []
input = x[:, 0, :]
print(input.shape)
for _ in range(x.shape[1]):
    input = input.view(1, 1, 1)
    (pred, hidden_prev) = model(input, hidden_prev)
    input = pred
    prediction.append(pred.detach().numpy().ravel()[0])

torch.Size([1, 1])


In [11]:
prediction

[0.9418645,
 0.8793632,
 0.86063874,
 0.7906774,
 0.69544023,
 0.5630178,
 0.41081086,
 0.2339775,
 0.040643193,
 -0.15414196,
 -0.34154776,
 -0.5124444,
 -0.65970105,
 -0.77880543,
 -0.86492383,
 -0.9128845,
 -0.9186183,
 -0.88083255,
 -0.80181146,
 -0.6866779,
 -0.5421467,
 -0.37550145,
 -0.1936478,
 -0.00242389,
 0.19321641,
 0.3877872,
 0.5734798,
 0.73972774,
 0.8751546,
 0.9709939,
 1.0235065,
 1.0337276,
 1.0052508,
 0.94186723,
 0.8462425,
 0.7198975,
 0.5643588,
 0.38309523,
 0.18322334,
 -0.024567775,
 -0.22834891,
 -0.41782057,
 -0.58564234,
 -0.72679806,
 -0.83691144,
 -0.91098344,
 -0.94382614,
 -0.93206704,
 -0.8759735]

# 梯度爆炸的处理方法

In [14]:
print(model)

MyRNN(
  (rnn): RNN(1, 20, batch_first=True)
  (linear): Linear(in_features=20, out_features=1, bias=True)
)


In [17]:
#梯度归零-》误差反传-》查看梯度-》控制梯度范围-》更新网络参数··················
for p in model.parameters():
    print(p.grad.norm())
torch.nn.utils.clip_grad_norm_(p, 10)#限制梯度在10以内
optimizer.step()#开始更新参数

tensor(0.0035)
tensor(0.0336)
tensor(0.0180)
tensor(0.0180)
tensor(0.0434)
tensor(0.0242)


# LSTM神经网络

In [18]:
lstm = nn.LSTM(input_size = 100, hidden_size=20, num_layers=4)
print(lstm)
x = torch.randn(10, 3, 100)
out, (h, c) = lstm(x)
out.shape, h.shape, c.shape

LSTM(100, 20, num_layers=4)


(torch.Size([10, 3, 20]), torch.Size([4, 3, 20]), torch.Size([4, 3, 20]))

# LSTMCell

In [19]:
cell = nn.LSTMCell(input_size = 100, hidden_size = 20)
h = torch.zeros(3, 20)
c = torch.zeros(3, 20)
for xt in x:
    h, c = cell(xt, [h, c])
print(h.shape, c.shape)


torch.Size([3, 20]) torch.Size([3, 20])


## 多层

In [20]:
cell1 = nn.LSTMCell(input_size=100, hidden_size=30)
cell2 = nn.LSTMCell(input_size=30, hidden_size=20)
h1 = torch.zeros(3, 30)
c1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
c2 = torch.zeros(3, 20)
for xt in x:
    h1. c1 = cell1(xt, [h1, c1])
    h2, c2 = cell2(h1, [h2, c2])
h2.shape, c2.shape

(torch.Size([3, 20]), torch.Size([3, 20]))

# embedding layer

In [None]:
nn.Embedding(vacab_size, embedding_dim)#[多少个单词，每一个单词的维度]
