# 循环神经网络从零开始实现

In [1]:
%matplotlib inline
import math
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l


In [None]:

batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

In [6]:
torch.tensor([0,2])

tensor([0, 2])

In [None]:
# 独热编码
F.one_hot(torch.tensor([0,1]),28)

tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])

In [13]:
X = torch.arange(10).reshape(2,5)
F.one_hot(X.T,28)

tensor([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0]],

        [[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0

# 单层循环神经网络

In [10]:
import torch
from torch import nn
import time

In [11]:
#假设一个文字数据，序列长为3
inputs=torch.randn((3,50,10)) #vocal_size,batch_size,input_size
rnn1=nn.RNN(input_size=10,hidden_size=20)

In [12]:
outputs1,hn1=rnn1(inputs)
print(outputs1.shape)
print(hn1.shape)

torch.Size([3, 50, 20])
torch.Size([1, 50, 20])


# 深度循环神经网络

In [14]:
inputs3=torch.randn((3,50,10)) #vocal_size,batch_size,input_size
drnn1=nn.RNN(input_size=10,num_layers=4,hidden_size=20)
outputs3,hn3=drnn1(inputs3)

In [None]:
outputs3.shape
outputs3[-1,:,:]
hn3.shape 
hn3[-1,:,:]
# output与hn有交集

## n.RNN的简单实现

- 输入层有100个神经元，输出层有3个（简单情感分析的三分类任务）
- 每个隐藏层有256个神经元

### 对最终结果进行分析


In [None]:
class myRNN(nn.Module):
    def __init__(self,input_size=100,hidden_size=256,num_layers=4,output_size=3):
        super(myRNN,self).__init__()
        #创建一个RNN模块
        self.rnn=nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
        #输出层单独创建
        self.fc=nn.Linear(in_features=hidden_size,out_features=output_size)

    def forward(self,X):
        output,hn=self.rnn(X)
        predict=self.fc(output[-1,:,:])
        return predict

In [25]:
#实例化模型
my_module=myRNN()
print(my_module)

myRNN(
  (rnn): RNN(100, 256, num_layers=4)
  (fc): Linear(in_features=256, out_features=3, bias=True)
)


### 假设对每个时间步进行分析

In [None]:
class myRNN(nn.Module):
    def __init__(self,input_size=100,hidden_size=256,num_layers=4,output_size=3):
        super(myRNN,self).__init__()
        #创建一个RNN模块
        self.rnn=nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
        #输出层单独创建
        self.fc=nn.Linear(in_features=hidden_size,out_features=output_size)

    def forward(self,X):
        output,_=self.rnn(X) #这里我们对hn不做过多关注
        output_size=output.reshape(output.shape[0]*output.shape[1],self.hidden_size)#三维转二维
        predict=self.fc(output_size)
        return predict

## 初始化参数h0

In [None]:
class myRNN3(nn.Module):
    def __init__(self,input_size=100,hidden_size=256,num_layers=4,output_size=3):
        super(myRNN3,self).__init__()
        #为了确定h0参数,需要先定义两个属性
        self.num_layers=num_layers
        self.hidden_size=hidden_size
        #创建一个RNN模块
        self.rnn=nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
        #输出层单独创建
        self.fc=nn.Linear(in_features=hidden_size,out_features=output_size)

    def forward(self,X):
        #x.shape=batch_size,seq_size,input_size
        #h0.shape=num_layers,batch_size,hidden_size
        #初始化h0
        h0=torch.zeros(self.num_layers,X.size(0),self.hidden_size)
        output,_=self.rnn(X) #这里我们对hn不做过多关注
        output_size=output.reshape(output.shape[0]*output.shape[1],self.hidden_size)#三维转二维
        predict=self.fc(output_size)
        return predict

## 实现每个隐藏层上神经元数目不一致的DRNN

In [39]:
# 4个隐藏层，前两个有256神经元，后两个512个神经元
class myRNN4(nn.Module):
    def __init__(self,input_size=100,hidden_size=[256,156,512,512],output_size=3):
        super(myRNN4,self).__init__()

        #定义四个不同的RNN层
        self.rnn1=nn.RNN(input_size,hidden_size[0])
        self.rnn2=nn.RNN(hidden_size[0],hidden_size[1])
        self.rnn3=nn.RNN(hidden_size[1],hidden_size[2])        
        self.rnn4=nn.RNN(hidden_size[2],hidden_size[3])
        self.hidden_size=hidden_size

        self.linear=nn.Linear(hidden_size[3],output_size)

    def forward(self,X):
        #x.shape=batch_size,seq_size,input_size
        #h0.shape=num_layers,batch_size,hidden_size
        #初始化h0
        #原来的h0=torch.zeros(self.num_layers,X.size(0),self.hidden_size) 
        #三维数据：4层隐藏层，3个batch_sieze(x),隐藏层神经元个数
        h0=[torch.zeros(1,X.size(0),self.hidden_size[0]),
            torch.zeros(1,X.size(0),self.hidden_size[1]),
            torch.zeros(1,X.size(0),self.hidden_size[2]),
            torch.zeros(1,X.size(0),self.hidden_size[3])
            ]
        #让输出的X不断进入下一个RNN层
        output1,_=self.rnn1(X,h0[0]) #这里我们对hn不做过多关注
        output2,_=self.rnn2(output1,h0[1])
        output3,_=self.rnn3(output2,h0[2])
        output4,_=self.rnn4(output3,h0[3])

        #取出最后一个batch结果

        output=self.linear(output4[-1,:,:])

        return output

In [40]:
model=myRNN4()
print(model)

myRNN4(
  (rnn1): RNN(100, 256)
  (rnn2): RNN(256, 156)
  (rnn3): RNN(156, 512)
  (rnn4): RNN(512, 512)
  (linear): Linear(in_features=512, out_features=3, bias=True)
)
