In [1]:
import torch
import torch.nn as nn
import random 
import zipfile

In [4]:
with zipfile.ZipFile("./jaychou_lyrics.txt.zip") as zin:
    with zin.open("jaychou_lyrics.txt") as f:
        corpus_chars = f.read().decode("utf-8")
print(corpus_chars[:10])

想要有直升机
想要和


In [5]:
corpus_chars = corpus_chars.replace("\n", " ").replace("\r", " ")
corpus_chars = corpus_chars[0:10000]

In [6]:
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
print("vocab size: ", vocab_size)

vocab size:  1027


In [7]:
corpus_indices = [char_to_idx[char] for char in corpus_chars]
sample = corpus_indices[:10]
print("chars: ", ''.join([idx_to_char[idx] for idx in sample]))
print("indices: ", sample)

chars:  想要有直升机 想要和
indices:  [921, 114, 392, 432, 347, 775, 308, 921, 114, 481]


In [8]:
def data_iter_random(corpus_chars,batch_size,num_steps,device=None):
    num_examples = (len(corpus_chars) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    def _data(pos):
        return corpus_chars[pos:pos+num_steps]
    
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    for i in range(epoch_size):
        i = i * batch_size
        batch_indices = example_indices[i:i+batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

In [9]:
my_seq = list(range(30))
for X, Y in data_iter_random(my_seq, batch_size=2, num_steps=6):
    print("X: ", X, "\nY: ", Y)

X:  tensor([[ 6.,  7.,  8.,  9., 10., 11.],
        [18., 19., 20., 21., 22., 23.]]) 
Y:  tensor([[ 7.,  8.,  9., 10., 11., 12.],
        [19., 20., 21., 22., 23., 24.]])
X:  tensor([[12., 13., 14., 15., 16., 17.],
        [ 0.,  1.,  2.,  3.,  4.,  5.]]) 
Y:  tensor([[13., 14., 15., 16., 17., 18.],
        [ 1.,  2.,  3.,  4.,  5.,  6.]])


In [26]:
import time
import numpy as np
import math
import torch
from torch import nn,optim
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(corpus_indices[:10])
#print(char_to_idx)
#print(idx_to_char[:10],vocab_size[:10])
print(vocab_size)

[921, 114, 392, 432, 347, 775, 308, 921, 114, 481]
1027


In [31]:
num_hiddens = 256
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)
#rnn_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens)
num_steps = 35
batch_size = 2
state = None
X = torch.rand(batch_size, num_steps, vocab_size)
Y, state_new = rnn_layer(X, state)
print(Y.shape, len(state_new),state_new[0].shape)

torch.Size([2, 35, 256]) 1 torch.Size([35, 256])


In [32]:
def one_hot(x, n_class, dtype=torch.float32):
    x = x.long()
    res = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device)
    res.scatter_(1, x.view(-1, 1), 1)
    return res

def to_onehot(X, n_class):
    return [one_hot(X[:,i], n_class) for i in range(X.shape[1])]


In [48]:
class RNNModel(nn.Module):
    def __init__(self, rnn_layer, vocab_size, num_hiddens):
        super(RNNModel, self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size = vocab_size
        self.dense = nn.Linear(num_hiddens, vocab_size)
        self.state = None
    
    def forward(self, inputs, state):
        X = to_onehot(inputs, self.vocab_size)
        Y, self.state = self.rnn(torch.stack(X), state)
        output = self.dense(Y.view(-1, Y.shape[-1]))
        return output, self.state

In [50]:
print(RNNModel())

TypeError: __init__() missing 3 required positional arguments: 'rnn_layer', 'vocab_size', and 'num_hiddens'

In [34]:
def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, num_hiddens, device, idx_to_char, char_to_idx):
    state = None
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        X = torch.tensor([output[-1]], device=device).view(1, 1)
        if state is not None:
            if isinstance(state,tuple):
                state = (state[0].to(device), state[1].to(device))
            else:
                state = state.to(device)
        Y, state = model(X, state)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(Y.argmax(dim=1).item())
    return ''.join([idx_to_char[i] for i in output])

In [36]:
model = RNNModel(rnn_layer, vocab_size, num_hiddens)
predict_rnn_pytorch('分开', 10, model, vocab_size, num_hiddens, device, idx_to_char, char_to_idx)

'分开倒宣倒力倒搞倒宣倒力'

In [37]:
def grad_clipping(params, theta, device):
    norm = torch.tensor([0.0], device=device)
    for param in params:
        norm += (param.grad.data ** 2).sum()
    norm = norm.sqrt().item()
    if norm > theta:
        for param in params:
            param.grad.data *= (theta / norm)

In [46]:
def train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, 
                            corpus_indices, idx_to_char, char_to_idx, 
                            num_epochs, num_steps, lr, clipping_theta, 
                            batch_size, pred_period, pred_len, prefixes):

    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    state = None
    for epoch in range(num_epochs):    
        l_sum, n, start = 0.0, 0, time.time()

        data_iter = data_iter_random(corpus_indices, batch_size, num_steps, device)
        
        for X, Y in data_iter:
            if state is not None:
                if isinstance(state, tuple):
                    state = (state[0].detach(), state[1].detach())
                else:
                    state = state.detach()

            (output, state)= model(X, state)
            y = torch.transpose(Y,0,1).contiguous().view(-1)
            l = loss(output, y.long())

            optimizer.zero_grad()
            l.backward()
            grad_clipping(model.parameters(), clipping_theta, device)
            optimizer.step()
            l_sum += l.item() * y.shape[0]
            n += y.shape[0]
        try:
            perplexity = math.exp(l_sum / n)
        except OverflowError:
            perplexity = float('inf')
        if (epoch+1)%pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (epoch+1, math.exp(l_sum/n), time.time()-start))
            for prefix in prefixes:
                print('-', predict_rnn_pytorch(prefix, num_steps, model, vocab_size, num_hiddens, device, idx_to_char, char_to_idx))

In [47]:
num_epochs, batch_size, lr, clipping_theta = 250, 32, 1e-3, 1e-2 
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device,
                            corpus_indices, idx_to_char, char_to_idx,
                            num_epochs, num_steps, lr, clipping_theta,
                            batch_size, pred_period, pred_len, prefixes)

epoch 50, perplexity 1.597870, time 0.86 sec
- 分开的我有 你知道 就是我听想要 你只想要 说陪我妈妈妈 但一直在多 就后
- 不分开不 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风 不能承受我
epoch 100, perplexity 1.238502, time 0.92 sec
- 分开的我有多难熬  没有你在我有多难熬多烦恼  没有你烦 我有多烦恼  没
- 不分开不 我不能再想 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风
epoch 150, perplexity 1.137098, time 0.87 sec
- 分开的我面红的可爱女人 温柔的让我心疼的可爱女人 透明的让我感动的可爱女人
- 不分开不 我不能再想 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风
epoch 200, perplexity 1.108896, time 0.91 sec
- 分开的我都会骑扫把的胖女巫 用拉丁文念咒语啦啦呜 她养的黑猫笑起来像哭 啦
- 不分开不 我不能再想 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风
epoch 250, perplexity 1.083577, time 0.94 sec
- 分开的我不要再想 我不要再想 我不 我不 我不要再想你 不知不觉 你已经离
- 不分开不 我不能再想 我不能再想 我不 我不 我不能 爱情走的太快就像龙卷风
