In [13]:
from torch.utils.data import Dataset,DataLoader
import os
from torch.utils.tensorboard import  SummaryWriter
import numpy as np
import torch
import torch.nn.functional as F
import re
import collections
import random
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
def read_text(address):    
    with open(address,'r') as f:
        lines=f.readlines()
    return [re.sub('[^A-Za-z]+',' ',line).strip().lower() for line in lines]

In [15]:
def token(lines):
    return [line.split() for line in lines]

In [16]:
def count(tokens):
    token=[token for line in tokens for token in line]
    return collections.Counter(token)

In [17]:
class Vocab():
    def __init__(self,tokens,min_fraq=0):
        counter=count(tokens)
        self.unique_token_counter=sorted(counter.items(),key=lambda x:x[1],reverse=True)
        self.densed_unique_token=['<unknown>']
        self.token2idx=collections.defaultdict(lambda:0)  # 未知词返回0
        for i in self.unique_token_counter:
            if i[1]>=min_fraq:
                self.densed_unique_token.append(i[0])
                self.token2idx[i[0]]=len(self.densed_unique_token)-1
    def __len__(self):
        return len(self.densed_unique_token)
    def __getitem__(self,idx):
        if not isinstance(idx,(list,tuple)):
            return self.densed_unique_token[idx]
        return [self.__getitem__(i) for i in idx]

In [18]:
def load_text_by_word(address,min_freq=0):
    lines=read_text(address)
    lines=[i for i in lines if i]
    vocab=Vocab(token(lines),min_fraq=min_freq)
    corpus=[j for i in lines for j in i.split()]
    return corpus,vocab
corpus,vocab=load_text_by_word('Educated.txt',min_freq=1)

In [19]:
def generate_random_material(corpus,batch_size,num_steps,vocab):
    print(corpus)
    #corpus=corpus[random.randint(0,num_steps-1):]
    corpus=[vocab.token2idx[i] for i in corpus]
    print(corpus)
    batch=(len(corpus)-1)//(batch_size*num_steps)
    trainset=[]
    testset=[]
    while True:
        for i in range(batch):
            trainset.append(torch.tensor([corpus[i*batch_size*num_steps+j*batch_size:i*batch_size*num_steps+j*batch_size+batch_size] for j in range(num_steps)]))
            testset.append(torch.tensor([corpus[i*batch_size*num_steps+j*batch_size+1:1+i*batch_size*num_steps+j*batch_size+batch_size] for j in range(num_steps)]))
        yield trainset,testset
t=generate_random_material(['educated','is','the','work','of','nonfiction','some','names','and','identifying','a','the','how'],2,3,vocab)
x,y=next(t)
print(x)

print([[[vocab[j] for j in i] for i in t] for t in x])
print([[[vocab[j] for j in i] for i in t] for t in y])

['educated', 'is', 'the', 'work', 'of', 'nonfiction', 'some', 'names', 'and', 'identifying', 'a', 'the', 'how']
[1332, 58, 1, 191, 7, 5084, 150, 1201, 4, 5085, 5, 1, 66]
[tensor([[1332,   58],
        [   1,  191],
        [   7, 5084]]), tensor([[ 150, 1201],
        [   4, 5085],
        [   5,    1]])]
[[['educated', 'is'], ['the', 'work'], ['of', 'nonfiction']], [['some', 'names'], ['and', 'identifying'], ['a', 'the']]]
[[['is', 'the'], ['work', 'of'], ['nonfiction', 'some']], [['names', 'and'], ['identifying', 'a'], ['the', 'how']]]


In [20]:
class rnn(torch.nn.Module):
    def __init__(self,rnn_layer,vocab_size,**kwargs):
        super(rnn,self).__init__(**kwargs)
        self.rnn_layer=rnn_layer
        self.vocab_size=vocab_size
        self.num_hiddens=self.rnn_layer.hidden_size
        self.linear=torch.nn.Linear(self.num_hiddens, self.vocab_size)
    def forward(self,inputs,state):
        X=F.one_hot(inputs.T.long(),self.vocab_size).to(torch.float32)
        X=X.to(device)
        state=state.to(device)
        Y,state=self.rnn_layer(X,state)
        output=self.linear(Y.reshape((-1,Y.shape[-1])))
        return output,state
    def begin_state(self,batch_size=1):
        return torch.zeros((self.rnn_layer.num_layers,batch_size,self.num_hiddens),device=device)
    
rnn_layer=torch.nn.GRU(len(vocab),512,2)
net=rnn(rnn_layer,len(vocab))
net=net.to(device)
t=net.begin_state()
d=net(torch.randint(low=1,high=10,size=(1,2),device=device),t)
print(d)

(tensor([[-0.0409, -0.0163, -0.0362,  ..., -0.0431,  0.0133,  0.0077],
        [-0.0433, -0.0064, -0.0402,  ..., -0.0430,  0.0153,  0.0146]],
       device='cuda:0', grad_fn=<AddmmBackward0>), tensor([[[ 0.0063,  0.0314, -0.0185,  ..., -0.0030, -0.0376, -0.0023]],

        [[-0.0064, -0.0175, -0.0516,  ..., -0.0472,  0.0406, -0.0065]]],
       device='cuda:0', grad_fn=<CudnnRnnBackward0>))


In [None]:
def predict(prefix,number,vocab,net):
    initial_state=net.begin_state()
    prefix=prefix.lower().split()
    print(prefix)
    output=[vocab.token2idx[prefix[0]]]
    state=initial_state
    for i in range(1,len(prefix)):
        #print(i,output[-1])
        _,state=net(torch.tensor([output[-1]],device=device).reshape(1,1),state)
        output.append(vocab.token2idx[prefix[i]])
    for i in range(number):
        out,state=net(torch.tensor([output[-1]],device=device).reshape(1,1),state)
        output.append((torch.argmax(out)))
    return [vocab[i] for i in output]
predict('i love you',10,vocab,net)

['i', 'love', 'you']
1 2
2 535


['i',
 'love',
 'you',
 'profits',
 'profits',
 'profits',
 'profits',
 'profits',
 'profits',
 'ornery',
 'profits',
 'profits',
 'profits']

In [22]:
def grad_clipping(net, theta):
    """裁剪梯度。"""
    # 如果 net 是 nn.Module 的实例（即使用 PyTorch 构建的模型）
    if isinstance(net, torch.nn.Module):
        # 获取所有需要计算梯度的参数列表
        params = [p for p in net.parameters() if p.requires_grad]
    # 如果 net 是自定义的模型（例如上述的 RNNModelScratch）
    else:
        # 获取自定义模型的参数列表
        params = net.params
    # 计算参数梯度的范数，即所有参数梯度平方和的平方根
    norm = torch.sqrt(sum(torch.sum((p.grad**2)) for p in params))
    # 如果梯度范数超过指定阈值 theta
    if norm > theta:
        # 对于每个参数
        for param in params:
            # 将参数的梯度值裁剪至指定范围内，保持梯度范数不超过 theta
            param.grad[:] *= theta / norm
def train_epoch(net,trainset,testset,loss,optimizer,batch_size):
    state=net.begin_state(batch_size=batch_size)
    for i in range(len(trainset)):
        state.detach_()
        X=trainset[i]
        Y=testset[i]
        Y=Y.T.reshape(-1).to(device)
        X,state=net(X,state)
        testloss=loss(X,Y.long().to(device))
        optimizer.zero_grad()
        testloss.backward()
        grad_clipping(net,1)
        optimizer.step()

In [23]:
def train_mul_epoch(net,learningrate,corpus,batch_size,num_steps,max_epoch,vocab):
    optimizer=torch.optim.Adam(net.parameters(),lr=learningrate)
    loss=torch.nn.CrossEntropyLoss()
    loss=loss.to(device)
    t=generate_random_material(corpus,batch_size,num_steps,vocab)
    for i in range(max_epoch):
        print(i)
        trainset,testset=next(t)
        #for i in range(len(trainset)):
        train_epoch(net,trainset,testset,loss,optimizer,num_steps)
        print(predict('time is',10,vocab,net))

In [24]:
lr=0.01
batch_size=36
num_steps=30
train_mul_epoch(net,learningrate=lr,corpus=corpus,batch_size=batch_size,num_steps=num_steps,max_epoch=10,vocab=vocab)

0
[1332, 58, 5, 191, 7, 5084, 150, 1201, 4, 5085, 1202, 55, 49, 430, 3539, 54, 284, 5086, 5087, 63, 2259, 3540, 3541, 11, 1, 2734, 3542, 54, 1333, 116, 56, 5088, 4, 3543, 7, 3544, 1333, 116, 3545, 220, 2735, 1333, 116, 4, 1, 116, 5089, 123, 2260, 5090, 7, 3544, 1333, 116, 3545, 1027, 7, 5091, 5092, 11, 5093, 5094, 1201, 1334, 442, 1665, 2261, 1332, 5, 2736, 442, 1334, 1920, 220, 2735, 1333, 116, 5095, 3546, 2262, 2262, 2737, 2738, 5096, 1334, 442, 139, 272, 322, 1666, 5097, 322, 1666, 100, 2739, 322, 2740, 272, 400, 618, 2734, 3542, 1666, 5098, 7, 139, 1921, 322, 1666, 5099, 322, 5100, 1666, 322, 2741, 5101, 2740, 322, 1666, 5102, 5103, 5104, 2, 1335, 5105, 2263, 5106, 5107, 2264, 2265, 26, 3546, 5108, 5109, 3547, 2262, 2737, 2262, 5110, 2742, 285, 5111, 54, 5112, 146, 5113, 5114, 18, 2737, 1028, 5115, 5116, 5117, 2743, 5118, 3548, 1028, 2261, 698, 3539, 5119, 1665, 21, 2266, 5120, 307, 48, 298, 1029, 1, 347, 298, 1, 332, 298, 739, 740, 298, 1667, 272, 298, 2744, 459, 298, 1668, 4, 512

In [None]:
rnn=torch.nn.RNN(10,12)
inpu=torch.randn(19,1,10)
h0=torch.randn(1,1,12)
output,hn=rnn(inpu,h0)
print(output.shape)