In [22]:
import time
import math
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

import d2l_pytorch as d2l

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

In [4]:
hidden_size = 256

In [18]:
class RnnModel(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(RnnModel, self).__init__()

        self.rnn = nn.RNN(input_size=vocab_size, hidden_size=hidden_size)
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size

        self.dense = nn.Linear(hidden_size, vocab_size)
        self.state = None

    def forward(self, X, state):
        X = d2l.to_onehot(X, self.vocab_size)
        Y, self.state = self.rnn(torch.stack(X), state)
        output = self.dense(Y.view(-1, Y.shape[-1]))
        return output, self.state

In [8]:
def predict_prefix_pytorch(prefix, seq_len, model, device, word2idx, idx2word):
    state = None
    output = [word2idx[prefix[0]]]
    for t in range(seq_len + int(len(prefix)) - 1):
        X = torch.tensor([[output[-1]]], device=device).view(1, 1)
        Y, state = model(X, state)
        if t < int(len(prefix)) - 1:
            output.append(word2idx[prefix[t + 1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx2word[idx] for idx in output])

In [23]:
seq_len = 10
model = RnnModel(vocab_size, hidden_size)
result = predict_prefix_pytorch('遇见', seq_len, model,device, char_to_idx, idx_to_char)
print(result)

type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
type of x: <class 'list'>
type of torch stack(x): torch.Size([1, 1, 1027])
遇见睡乐奈格奈窝坦奈奈格


In [33]:
import d2l_pytorch as d2l

def train_and_predict_rnn_pytorch(model, hidden_size, vocab_size, device,
                                  corpus_indices, idx2word, word2idx, num_epochs, num_steps,
                                  lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    state = None
    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, device)
        for X, Y in data_iter:
            # if
            output, state = model(X, state)
            y = torch.transpose(Y, 0, 1).contiguous().view(-1)
            l = loss(output, y.long())
            optimizer.zero_grad()
            l.backward()

            d2l.grad_clipping(model.parameters(), clipping_theta, device)
            optimizer.step()
            l_sum += l.item() * y.shape[0]
            n += y.shape[0]

            try:
                perplexity = math.exp(l_sum / n)
            except OverflowError:
                perplexity = float('inf')
            if (epoch + 1) % pred_period == 0:
                print('epoch %d, perplexity %f, time %.2f sec' % (
                    epoch + 1, perplexity, time.time() - start))
                for prefix in prefixes:
                    print(' -', predict_prefix_pytorch( prefix, pred_len, model, device,
                        char_to_idx, idx_to_char))

In [34]:
num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e-3, 1e-2 # 注意这里的学习率设置
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
train_and_predict_rnn_pytorch(model, hidden_size, vocab_size, device,
                            corpus_indices, idx_to_char, char_to_idx,
                            num_epochs, seq_len, lr, clipping_theta,
                            batch_size, pred_period, pred_len, prefixes)

type of x: <class 'list'>
type of torch stack(x): torch.Size([10, 32, 1027])


AttributeError: module 'd2l_pytorch' has no attribute 'grad_clipping'