In [2]:
import torch
import torch.nn as nn
import random 
import zipfile

In [3]:
with zipfile.ZipFile("./jaychou_lyrics.txt.zip") as zin:
    with zin.open("jaychou_lyrics.txt") as f:
        corpus_chars = f.read().decode("utf-8")
print(corpus_chars[:100])

想要有直升机
想要和你飞到宇宙去
想要和你融化在一起
融化在宇宙里
我每天每天每天在想想想想著你
这样的甜蜜
让我开始乡相信命运
感谢地心引力
让我碰到你
漂亮的让我面红的可爱女人
温柔的让我心疼的可


In [4]:
corpus_chars = corpus_chars.replace("\n", " ").replace("\r", " ")
corpus_chars = corpus_chars[0:10000]

In [5]:
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
print("vocab size: ", vocab_size)

vocab size:  1027


In [7]:
corpus_indices = [char_to_idx[char] for char in corpus_chars]
sample = corpus_indices[:10]
print("chars: ", ''.join([idx_to_char[idx] for idx in sample]))
print("indices: ", sample)

chars:  想要有直升机 想要和
indices:  [5, 86, 547, 203, 565, 633, 399, 5, 86, 308]


In [48]:
def data_iter_random(corpus_chars,batch_size,num_steps,device=None):
    num_examples = (len(corpus_chars) - 1) // num_steps
    epoch_size = num_examples // batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)

    def _data(pos):
        return corpus_chars[pos:pos+num_steps]
    
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    for i in range(epoch_size):
        i = i * batch_size
        batch_indices = example_indices[i:i+batch_size]
        X = [_data(j * num_steps) for j in batch_indices]
        Y = [_data(j * num_steps + 1) for j in batch_indices]
        yield torch.tensor(X, dtype=torch.float32, device=device), torch.tensor(Y, dtype=torch.float32, device=device)

In [10]:
my_seq = list(range(30))
for X, Y in data_iter_random(my_seq, batch_size=2, num_steps=6):
    print("X: ", X, "\nY: ", Y)

X:  tensor([[ 6.,  7.,  8.,  9., 10., 11.],
        [ 0.,  1.,  2.,  3.,  4.,  5.]]) 
Y:  tensor([[ 7.,  8.,  9., 10., 11., 12.],
        [ 1.,  2.,  3.,  4.,  5.,  6.]])
X:  tensor([[18., 19., 20., 21., 22., 23.],
        [12., 13., 14., 15., 16., 17.]]) 
Y:  tensor([[19., 20., 21., 22., 23., 24.],
        [13., 14., 15., 16., 17., 18.]])


In [13]:
import time
import math
import numpy as np
import torch 
from torch import nn,optim
import torch.nn.functional as F

import sys
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [65]:
#print(corpus_chars),
#print(char_to_idx)
#print(idx_to_char)
print(vocab_size)

1027


In [66]:
def one_hot(x, n_class, dtype=torch.float32):
    x = x.long()
    res = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device)
    res.scatter_(1, x.view(-1, 1), 1)
    return res

x = torch.tensor([0,2])
one_hot(x, vocab_size)

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.]])

In [67]:
def to_onehot(X, n_class):
    return [one_hot(X[:,i], n_class) for i in range(X.shape[1])]

X = torch.arange(10).view(2,5)
inputs = to_onehot(X, vocab_size)
print(len(inputs),inputs[0].shape)

5 torch.Size([2, 1027])


In [68]:
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
print("will use", device)

def get_params():
    def _one(shape):
        ts = torch.tensor(np.random.normal(0, 0.01, size=shape), device=device, dtype=torch.float32)
        return torch.nn.Parameter(ts, requires_grad=True)

    W_xh = _one((num_inputs, num_hiddens))
    W_hh = _one((num_hiddens, num_hiddens))
    b_h = torch.nn.Parameter(torch.zeros(num_hiddens, device=device, requires_grad=True))

    W_hq = _one((num_hiddens, num_outputs))
    b_q = torch.nn.Parameter(torch.zeros(num_outputs, device=device, requires_grad=True))
    return nn.ParameterList([W_xh, W_hh, b_h, W_hq, b_q])

will use cpu


In [69]:
def init_rnn_state(batch_size, num_hiddens, device):
    return (torch.zeros(batch_size, num_hiddens, device=device),)


In [70]:

def rnn(inputs, state, params):
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = torch.tanh(torch.matmul(X, W_xh) + torch.matmul(H, W_hh) + b_h)
        Y = torch.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)

In [71]:
state = init_rnn_state(X.shape[0], num_hiddens, device)
inputs = to_onehot(X.to(device), vocab_size)
params = get_params()
outputs, state_new = rnn(inputs, state, params)
print(len(outputs), outputs[0].shape,state_new[0].shape)

5 torch.Size([2, 1027]) torch.Size([2, 256])


In [72]:
def predict_rnn(prefix,num_chars,rnn,params,init_rnn_state,num_hiddens,vocab_size,device,idx_to_char,char_to_idx):
    state = init_rnn_state(1, num_hiddens, device)
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        X = to_onehot(torch.tensor([[output[-1]]], device=device), vocab_size)
        Y, state = rnn(X, state, params)
        if t < len(prefix)-1:
            output.append(char_to_idx[prefix[t+1]])
        else:
            output.append(Y[0].argmax(dim=1).item())
    return ''.join([idx_to_char[i] for i in output])

In [73]:
predict_rnn("分开", 10, rnn, params, init_rnn_state, num_hiddens, vocab_size, device, idx_to_char, char_to_idx)

'分开只啸废载杵游索潮沟象'

In [74]:
def grad_clipping(params, theta, device):
    norm = torch.tensor([0.0], device=device)
    for param in params:
        norm += (param.grad.data ** 2).sum()
    norm = norm.sqrt().item()
    if norm > theta:
        for param in params:
            param.grad.data *= (theta / norm)

In [75]:
def sgd(params, lr, batch_size):  
    for param in params:
        param.data -= lr * param.grad / batch_size

In [76]:
def train_and_predict_rnn(rnn,get_params,init_rnn_state,num_hiddens,
                            vocab_size,device,corpus_indices,idx_to_char,
                            char_to_idx,is_random_iter,num_epochs,num_steps,
                            lr,clipping_theta,batch_size,pred_period,
                            pre_len,prefixes):
    if is_random_iter:
        data_iter_fn = data_iter_random(corpus_chars,batch_size,num_steps)
    else:
        data_iter_fn = data_iter_consecutive()

    loss = nn.CrossEntropyLoss()
    params = get_params()

    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_state(batch_size, num_hiddens, device)
        
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_random(corpus_indices, batch_size, num_steps, device)
        
        for X,Y in data_iter:
            if is_random_iter:
                state = init_rnn_state(batch_size, num_hiddens, device)
            else:
                for s in state:
                    s.detach_()
            
            inputs = to_onehot(X, vocab_size)
            (outputs, state)= rnn(inputs, state, params)
            outputs = torch.cat(outputs,dim=0)
            y = torch.transpose(Y,0,1).contiguous().view(-1)
            l = loss(outputs, y.long())

            if params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            grad_clipping(params, clipping_theta, device)
            sgd(params, lr, 1)
            l_sum += l.item() * y.shape[0]
            n += y.shape[0]
        
        if (epoch+1)%pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (epoch+1, math.exp(l_sum/n), time.time()-start))
            for prefix in prefixes:
                print('-', predict_rnn(prefix, num_steps, rnn, params, init_rnn_state, num_hiddens, vocab_size, device, idx_to_char, char_to_idx))

In [77]:
num_epochs, num_steps, batch_size, lr, clipping_theta = 250, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']

In [78]:
train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                      vocab_size, device, corpus_indices, idx_to_char,
                      char_to_idx, True, num_epochs, num_steps, lr,
                      clipping_theta, batch_size, pred_period, pred_len,
                      prefixes)

epoch 50, perplexity 68.352656, time 1.36 sec
- 分开 我想要再想 我不能再想 我不要再想 我不要再想 我不要再想 我不要再
- 不分开  我有你有 我的可空女人 透坏的让我疯狂的可爱女人 坏坏的让我疯狂的
epoch 100, perplexity 9.108562, time 1.33 sec
- 分开 有使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈兮 快使用双截棍 哼哼哈
- 不分开久 我不能再想 我不能再想 我不 我不 我不要再想 我不 我不 我不要
epoch 150, perplexity 2.664879, time 1.36 sec
- 分开 有什么不妥 有唱都在说喝 它下拥有对手我进攻 我的伤口被你拆封 誓言
- 不分开吗 我后能再想 我不能再想 我不 我不 我不要再想你 不知不觉 你已经
epoch 200, perplexity 1.553587, time 1.38 sec
- 分开 有什么不留在的母都鸠 牛铅我猫女画被B 心教太午重边谁Bab 印地安
- 不分开吗 我叫你爸 你打我的 如一了吗 你给的梦 你给放空 是一场痛 你在操
epoch 250, perplexity 1.270069, time 2.09 sec
- 分开 一只两不留 二唱它停留的 为什么我女朋友场外加油 你却还让我出糗 看
- 不分开吗 我叫你爸 你打我妈 这样对吗干嘛这样 何必让酒牵鼻子走 瞎 说沉你
