In [5]:
# 文本预处理

import collections
import re  #python中的正则模块
import numpy
from  numpy import *
#from d2l import torch as d2l



In [25]:
### 8.3 ###

#使用随机抽样从数据中生成小批量子序列
def seq_data_iter_random(corpus,batch_size,num_steps):
    corpus=corpus[random.randint(0,num_steps-1):] #random.randint(0,num_step-1)表示选择任意偏移量作为初始位置
    num_subseqs=(len(corpus)-1) // num_steps #减去1：考虑标签
    initia_indices=list(range(0,num_subseqs*num_steps,num_steps))
    random.shuffle(initia_indices) #小批量中的子序列不一定在原始序列上相邻
    
    def data(pos):
        return corpus[pos:pos+num_steps] # 返回从`pos`位置开始的长度为`num_steps`的序列
    
    num_batches=num_subseqs // batch_size  #num_batches 批量数目
    for i in range(0,batch_size*num_batches,batch_size):
        initia_indices_per_batch=initia_indices[i:i+batch_size]
        X=[data(j) for j in initia_indices_per_batch]
        Y=[data(j+1) for j in initia_indices_per_batch]
        yield torch.tensor(X),torch.tensor(Y)



In [None]:
### 8.5 RNN模型-不使用框架###

#RNN模型---不使用框架&使用one-hot encoding

import math 
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

batch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)

#1.初始化模型参数
#num_hiddens为可调的超参数；训练模型时，输入和输出来自相同的词表，具有相同的维度
def get_params(vocab_size,num_hiddens,device): #device用于使用GPU
    num_inputs=num_outputs=vocab_size
    
    def normal(shape):
        return torch.randn(size=shape,device=device)*0.1 #torch.randn返回一个张量，包含了从标准正态分布（均值为0，方差为1，即高斯白噪声）中抽取的一组随机数。张量的形状由参数sizes定义
    
    #隐藏层参数
    W_xh=normal((num_inputs,num_hiddens))
    W_hh=normal((num_hiddens,num_hiddens))
    b_h=torch.zeros(num_hiddens,device=device)
    #输出层参数
    W_hq=normal((num_hiddens,num_outputs))
    b_q=torch.zeros(num_outputs,device=device)
    #附加梯度
    params=[W_xh,W_hh,b_h,W_hq,b_q]
    for param in params:
        param.requires_grad_(True)
    return params
    
#2.RNN模型
#初始化状态函数，返回隐状态
def init_rnn_state(batch_size,num_hiddens,device):
    return (torch.zeros((batch_size,num_hiddens),device=device),)

#RNN运算函数，一个时间步内的隐状态和输出计算
def rnn(inputs,state,params): #inputs 形状：(num_steps,batch_size,vocab_size)
    W_xh,W_hh,b_h,W_hq,b_q=params
    H,=state
    outputs=[]
    for X in inputs:  #X 形状：(batch_size,vocab_size)
        H=torch.tanh(torch.mm(X,W_xh)+torch.mm(H,W_hh)+b_h)
        Y=torch.mm(H,W_hq)+b_q
        outputs.append(Y) 
    return outputs,(H,)

#3.RNN模型类
class RNNModelScratch:
    def __init__(self,vocab_size,num_hiddens,device,
                get_params,init_state,forward_fn):
        self.vocab_size,self.num_hiddens=vocab_size,num_hiddens
        self.params=get_params(vocab_size,num_hiddens,device)
        self.init_state,self.forward_fn=init_state,forward_fn
        
    def __call__(self,X,state):
        X=F.one_hot(X.T,self.vocab_size).type(torch.float32)
        return self.forward_fn(X,state,self.params)
    
    def begin_state(self,batch_size,device):
        return self.init_state(batch_size,self.num_hiddens,device)
    
#实例化网络
num_hiddens=512
net=RNNModelScratch(len(vocab),num_hiddens,d2l.try_gpu(),get_params,
                   init_rnn_state,rnn)
state=net.begin_state(X.shape[0],d2l.try_gpu())
Y,new_state=net(X.to(d2l.try_gpu()),state)

#定义预测函数，生成prefix之后的新字符
def predict_ch8(prefix,num_preds,net,vocab,device):
    state=net.begin_state(batch_size=1,device=device)
    outputs=[vocab[prefix[0]]]
    get_input=lambda:torch.tensor([outputs[-1]].device=device).reshape((1,1))
    for y in prefix[1:]:  #预热期
        _,state=net(get_input(),state)
        outputs.append(vocab[y])
    for _ in range(num_preds): #预测num_preds步
        y,state=net(get_input(),state)
        outputs.append(int(y.argmax(dim=1).reshape(1)))
    return ''.join([vocab.idx_to_token[i] for i in outputs])
        
#4.梯度裁剪,避免梯度爆炸
def grad_clipping(net,theta):
    if isinstance(net,nn.Module):
        params=[p for p in net.parameters() if p.requires_grad]
    else:
        params=net.params
    norm=torch.sqrt(sum(torch.sum((p.grad**2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:]*=theta/norm
            

In [None]:
### 8.5 RNN模型-使用深度学习框架高级API###

import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
    
#加载数据
batch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps)

#定义模型
num_hiddens=256
rnn_layer=nn.RNN(len(vocab),num_hiddens) #构造一个具有256个隐藏单元的单隐藏层的循环神经网络层rnn_layer
state=torch.zeros((1,batch_size,num_hiddens)) #使用张量初始化隐状态
X=torch.rand(size=(num_steps,batch_size,len(vocab)))
Y,state_new=rnn_layer(X,state) #Y是指每个时间步的隐状态，不涉及输出层计算，用作后续输出层的输入

#RNN模型
class RNNModel(nn.Module):
    def __init__(self,rnn_layer,vocab_size,**kwargs): #构造函数
        super(RNNModel,self).__init__(**kwargs)
        self.rnn=rnn_layer
        self.vocab_size=vocab_size
        self.num_hiddens=self.rnn.hidden_size
        if not self.rnn.bidirectional: # 如果RNN是双向的（之后将介绍），`num_directions`应该是2，否则应该是1
            self.num_directions=1
            self.linear=nn.Linear(self.num_hiddens,self.vocab_size)
        else:
            self.num_directions=2
            self.linear=nn.Linear(self.num_hiddens*2,self.vocab_size)
       
    def forward(self,inputs,state): #前向传播函数
        X=F.one_hot(input.T.long(),self.vocab_size) #对输入进行onehot编码
        X=X.to(torch.float32)
        Y,state=self.rnn(X,state)
        output=self.linear(Y.reshape((-1,Y.shape[-1]))) #Y.shape[-1]取Y最后一维的大小
        return output,state
    
    def begin_state(self,device,batch_size=1):
        if not isinstance(self.rnn,nn.LSTM): #isinstance(self.rnn,nn.LSTM)当前rnn类型与nn.LSTM相同，返回True，否则返回False
            #非LSTM网络，以张量作为隐状态
            return torch.zeros((self.num_directions*self.rnn.num_layers,batch_size,self.num_hiddens),device=device)
        else:
            #LSTM网络，以元组作为隐状态
            return (torch.zeros((self.num_directions*self.rnn.num_layers,batch_size,self.num_hiddens),device=device)),
                    torch.zeros((self.num_directions*self.rnn.num_layers,batch_size,self.num_hiddens),device=device))

#基于具有随机权重的模型进行预测
device=d2l.try_gpu()
net=RNNModel(rnn_layer,vocab_size=len(vocab))
net=net.to(device)
d2l.predict_ch8('time traveller',10,net,vocab,device)

#使用超参数进行训练
num_epochs,lr=500,1
d2l.train_ch8(net,train_iter,vocab,lr,num_epochs,device)


In [17]:
x=numpy.array([1,2,3,4])
#x.shape[-1]
#y=x.reshape((2,2))
x.reshape((-1,1))

array([[1],
       [2],
       [3],
       [4]])

In [7]:
x=[1,2,3,4,3]
y=[3,4,5,7,8]

print(x[:-1])
print(y[1:])

z=zip(x[:-1],y[1:])
seq=[pair for pair in z]
print(seq)

[1, 2, 3, 4]
[4, 5, 7, 8]
[(1, 4), (2, 5), (3, 7), (4, 8)]


In [26]:
x=list(range(0,4*5,4))
print(x)

random.shuffle(x)
x

[0, 4, 8, 12, 16]


[8, 12, 16, 4, 0]

In [10]:
10//4

2