# LSTM Language Model

In [1]:
import time
import math
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

import d2lzh_pytorch as d2dl
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

import random

In [2]:
# Obtain Data
def load_jaychou_lyrics():
    with open('jaychou_lyrics.txt') as f:
        corpus_char = f.read()
    corpus_char = corpus_char.replace('\n', ' ').replace('\r', ' ')
    corpus_char = corpus_char[0:20000]
    char_list = list(set(corpus_char))
    vocab_size = len(char_list)
    char_dict = dict([(item,i) for i, item in enumerate(char_list)])
    corpus_indices = [char_dict[item] for item in corpus_char]
    return corpus_indices, char_dict, char_list, vocab_size

(corpus_indices, char_dict, char_list, vocab_size) = load_jaychou_lyrics()
input_size, hidden_size = vocab_size, 256

# Load Data
def data_iter_consecutive(corpus_indices, batch_size, num_steps, device=device):
    corpus_indices = torch.tensor(corpus_indices, dtype=torch.float32, device=device)
    len_corpus = len(corpus_indices)
    num_batch = len_corpus // batch_size
    indices = corpus_indices[:num_batch*batch_size].view(batch_size, num_batch)
    num_batch_example = (num_batch-1) // num_steps
    for i in range(num_batch_example):
        i = i * num_steps
        X = indices[:,i:i+num_steps]
        Y = indices[:,i+1:i+1+num_steps]
        yield X,Y

# Define Model
class lstmModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(lstmModel,self).__init__()
        self.input_size = input_size
        self.lstm_layer = nn.LSTM(input_size,hidden_size,bidirectional=False)
        self.hidden_size = self.lstm_layer.hidden_size * (2 if self.lstm_layer.bidirectional else 1)
        self.dense_layer = nn.Linear(self.hidden_size, self.input_size)
        self.state = None  # in LSTM state=(h,c)
    def forward(self, inputs, state):
        x = d2dl.to_onehot(inputs, self.input_size)
        y, self.state = self.lstm_layer(torch.stack(x),state)  # stack
        output = self.dense_layer(y.view(-1,y.shape[-1]))  # view
        return output, self.state

net = lstmModel(input_size,hidden_size).to(device)
print(net)
        
# Predict Model
def predict_lstm_lm(prefix, net, pred_len, char_dict, char_list, vocab_size, device):
    outputs = [char_dict[prefix[0]]]
    state = None
    for i in range(pred_len+len(prefix)-1):
        inputs = torch.tensor([outputs[-1]], device=device).view(1,1)
        if state is not None:
            if isinstance(state, tuple): # LSTM, state:(h, c)  
                state = (state[0].to(device), state[1].to(device))
            else:   
                state = state.to(device) 
        (Y, state) = net(inputs, state)
        if i < len(prefix)-1:
            outputs.append(char_dict[prefix[i+1]])
        else:
            outputs.append(int(Y.argmax(1).item()))
    return ''.join([char_list[item] for item in outputs])
    

# Train Model using consecutive dataloader
def train_lstm_lm(net, lr, num_epochs, corpus_indices, batch_size, num_steps, device,\
                 grad_clipping_theta, pred_period, prefixes, pred_len, vocab_size, char_dict,\
                 char_list):
    loss = nn.CrossEntropyLoss()
    optimizor = optim.Adam(net.parameters(), lr)
    net = net.to(device)
    
    for epoch in range(num_epochs):
        state = None
        l_sum, n, start_time = 0.0, 0, time.time()
        data_iter = data_iter_consecutive(corpus_indices, batch_size, num_steps, device)
        for X, Y in data_iter:
            if state is not None:
                if isinstance(state, tuple):
                    state = (state[0].detach(),state[1].detach())
                else: 
                    state = state.detach()
            
            # modeling
            (outputs, state) = net(X, state)
            y = torch.transpose(Y,0,1).reshape(-1)  # contiguous().view(-1)
            l = loss(outputs, y.long())
            
            # bp
            optimizor.zero_grad()
            l.backward()
            d2dl.grad_clipping(net.parameters(), grad_clipping_theta, device)
            optimizor.step()
            
            # stats
            l_sum += l.item()
            n += 1
        
        try:
            perplexity = math.exp(l_sum/n)
        except OverflowError:
            perplexity = float('inf')
        
        if (epoch+1) % pred_period == 0:
            print('Epoch: %d, Perplexity: %f, Time: %.2f sec' %\
                  (epoch+1, perplexity, time.time()-start_time))
            
            for prefix in prefixes:
                print('-', predict_lstm_lm(prefix, net, pred_len, char_dict, char_list, vocab_size, device))
            
            
            
# Hyperparameters and train
num_epochs = 250
batch_size = 32
lr = 0.01
grad_clipping_theta = 0.01
num_steps = 32
pred_period, pred_len, prefixes = 50, 50, ['分开', '不分开']
train_lstm_lm(net, lr, num_epochs, corpus_indices, batch_size, num_steps, device,\
                 grad_clipping_theta, pred_period, prefixes, pred_len, vocab_size, char_dict,\
                 char_list)


lstmModel(
  (lstm_layer): LSTM(1447, 256)
  (dense_layer): Linear(in_features=256, out_features=1447, bias=True)
)
Epoch: 50, Perplexity: 1.044233, Time: 2.97 sec
- 分开始没人注意到你我 等雨变强之前 我们将会分化软弱 趁时间没发觉 让我带着你离开 没有了证明 没有了空
- 不分开 为什么还要我用微笑来带过 我没有这种天份 包容你也接受他 不用担心的太多 我会一直好好过 你已经远
Epoch: 100, Perplexity: 1.575040, Time: 3.43 sec
- 分开始脸太多难过 你比我好想就这样牵着你 微笑起天都能回到  为什么这么简单你做不到  但是一个人之后 
- 不分开 为什么这种速度你追不要问 我就是那条龙 渴望着血脉相通 无限个千万弟兄 我把天 挑落好不再也能说你
Epoch: 150, Perplexity: 1.016938, Time: 3.09 sec
- 分开都迁就着你 我真的没有天份 安静的没这么快 我会学着放弃你 是因为我太爱你 是因为我太爱你你要我说多
- 不分开 它在空中停留 所有人看着我 抛物线进球 单手过人运球 篮下妙传出手 漂亮的假动作 帅呆了我 全场盯
Epoch: 200, Perplexity: 1.019268, Time: 3.76 sec
- 分开 爱像一阵风 吹完它就走 这样的节奏 谁都无可奈何 没有你以后 我灵魂失控 黑云在降落 我被它拖着走
- 不分开 为什么还要我用微笑来带过 我没有这种天份 包容你也接受他 不用担心的太多 我会一直好好过 你已经远
Epoch: 250, Perplexity: 1.015867, Time: 3.41 sec
- 分开都迁就着你 我真的没有天份 安静的没这么快 我会学着放弃你 是因为我太爱你 是因为我太爱你 该不该搁
- 不分开 为什么这样子 你看着我说你已经决定 我拉不住你 他的手应该比我更暖 铁盒的序变成了日记 变成了空气
Epoch: 300, Perplexity: 1.025433, Time: 3.31 sec
- 分开时间 所以你弃权 所以你弃权 我想要的想做的你比谁都了  你想说

In [16]:
res = [0,1]
res.append(1)
res[2]

1