# 詞神林夕養成計畫(pytorch)

![md_images](../Images/charrnn.png)

In [1]:
import os
import io
import sys
import math
import codecs
import numpy as np
import random
import time
import torch
import torch.nn as nn
from torch.autograd import Variable

use_cuda=True


In [2]:
with io.open('lingxi.txt', encoding='utf-8-sig') as f:
    corpus = list(f.read().lower())
print('corpus length:', len(corpus))



corpus length: 52647


In [3]:

#把每個字去重複
chars = sorted(list(set(corpus)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 2114


In [4]:

n_seqs, n_steps = 16, 32
curr_idx = 0
text=corpus[curr_idx:curr_idx+n_seqs* n_steps*100]

encoded = np.array([char_indices[ch] for ch in text])
def one_hot_encode(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot


def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns mini-batches of size
       n_seqs x n_steps from arr.
    '''
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size

    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))

    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [5]:

class CharRNN(nn.Module):
    def __init__(self, vocabs, n_steps=100, n_hidden=256, n_layers=2,
                 drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr

        self.vocabs = vocabs
        self.idx2char = dict(enumerate(self.vocabs))
        self.char2idx = {ch: ii for ii, ch in self.idx2char.items()}

        self.dropout = nn.Dropout(drop_prob)
        self.lstm = nn.LSTM(len(self.vocabs), n_hidden, n_layers,
                            dropout=drop_prob, batch_first=True)
        self.fc = nn.Linear(n_hidden, len(self.vocabs))

        self.init_weights()

    def forward(self, x, hc):
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)

        x = x.view(x.size()[0] * x.size()[1], self.n_hidden)

        x = self.fc(x)

        return x, (h, c)

    def predict(self, char, h=None, cuda=False, top_k=None):
        ''' 输入一个字去预测下一个字
            回传预测的字以及隐状态
        '''
        if cuda:
            self.cuda()
        else:
            self.cpu()

        if h is None:
            h = self.init_hidden(1)

        x = np.array([[self.char2idx[char]]])
        x = one_hot_encode(x, len(self.vocabs))
        inputs = Variable(t.from_numpy(x), volatile=True)
        if cuda:
            inputs = inputs.cuda()

        h = tuple([Variable(each.data, volatile=True) for each in h])
        out, h = self.forward(inputs, h)

        p = nn.softmax(out).data
        if cuda:
            p = p.cpu()

        if top_k is None:
            top_ch = np.arange(len(self.vocabs))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()

        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p / p.sum())

        return self.idx2char[char], h

    def init_weights(self):
        ''' 初始化权重'''
        initrange = 0.1

        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)

    def init_hidden(self, n_seqs):
        weight = next(self.parameters()).data
        return (Variable(weight.new(self.n_layers, n_seqs, self.n_hidden).zero_()),
                Variable(weight.new(self.n_layers, n_seqs, self.n_hidden).zero_()))


In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds+10e-14) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / (np.sum(exp_preds)+10e-14)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [7]:

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds+10e-14) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / (np.sum(exp_preds)+10e-14)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def write_something(epoch,model):
    print()
    print('----- 第Epoch: %d後自動寫詞' % epoch)
    torch.save(model, 'Models/LingXi_pytorch_{0}.lstm'.format(epoch))
    torch.save(model, 'Models/LingXi_pytorch.lstm')
    start_index = random.randint(0, len(text) - n_steps - 1)
    for diversity in [1.0]:
        print('----- diversity:', diversity)
        h = model.init_hidden(1)
        h = tuple([Variable(each.data) for each in h])
        generated = ''
        sentence = ''.join(text[start_index: start_index +3])
        generated += sentence
        print('----- 根據以下詞彙發想: "' + sentence + '"')
        sys.stdout.write(generated)
        
        for i in range(400):
            try:
    
                arr=np.zeros((1,len(sentence),len(chars))).astype(np.float32) 
                for i in range(len(list(sentence))):
                    s=list(sentence)[i]
                    arr[0,i,char_indices[s]]=1
                input=torch.from_numpy(arr)
                input=Variable(input)
                input=input.cuda()
                output, h = model(input,h)
                pred=output.cpu().detach().numpy()
        
                preds = pred[-1]
                next_index = np.argmax(preds)#sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
            except Exception as e:
                print(e)
        print()

In [8]:
model = CharRNN(chars, n_hidden=512, n_layers=2)

if os.path.exists('Models/LingXi_pytorch.lstm'):
    model=torch.load('Models/LingXi_pytorch.lstm')
    print('recovered!!')

model.train()
model_optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

if use_cuda:
    model.cuda()



recovered!!


In [9]:
counter = 0
epochs=30
n_chars = len(chars)
print('start epoch!')
for epoch in range(epochs):
    h = model.init_hidden(n_seqs)
    for x, y in get_batches(encoded, n_seqs, n_steps):
        counter += 1
        x = one_hot_encode(x, n_chars).astype(np.float32)
        x, y = torch.from_numpy(x), torch.from_numpy(y.astype(np.int64))
         
        inputs, targets = Variable(x), Variable(y)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        h = tuple([Variable(each.data) for each in h])

        model.zero_grad()
            
        output, h = model.forward(inputs, h)

        loss = criterion(output, targets.view(n_seqs*n_steps))
                

        loss.backward()
            
        # 梯度截斷
        #nn.utils.clip_grad_norm(model.parameters(), 5)

        model_optimizer.step()
        if counter % 2 == 0:
            #列印訓練狀態
            print("Epoch: {}/{}...".format(epoch+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.data.item()))
        if counter % 20 == 0:
            torch.save(model, 'Models/LingXi_pytorch_{0}.lstm'.format(epoch))
            torch.save(model, 'Models/LingXi_pytorch.lstm')
        if counter % 100 == 0:
            write_something(epoch,model)
    
  

start epoch!
Epoch: 1/30... Step: 2... Loss: 0.5658...
Epoch: 1/30... Step: 4... Loss: 0.5332...
Epoch: 1/30... Step: 6... Loss: 0.5801...
Epoch: 1/30... Step: 8... Loss: 0.5973...
Epoch: 1/30... Step: 10... Loss: 0.5798...
Epoch: 1/30... Step: 12... Loss: 0.5330...
Epoch: 1/30... Step: 14... Loss: 0.5789...
Epoch: 1/30... Step: 16... Loss: 0.5112...
Epoch: 1/30... Step: 18... Loss: 0.4942...
Epoch: 1/30... Step: 20... Loss: 0.5820...


  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1/30... Step: 22... Loss: 0.6691...
Epoch: 1/30... Step: 24... Loss: 0.5271...
Epoch: 1/30... Step: 26... Loss: 0.5200...
Epoch: 1/30... Step: 28... Loss: 0.5670...
Epoch: 1/30... Step: 30... Loss: 0.6451...
Epoch: 1/30... Step: 32... Loss: 0.4715...
Epoch: 1/30... Step: 34... Loss: 0.4633...
Epoch: 1/30... Step: 36... Loss: 0.5635...
Epoch: 1/30... Step: 38... Loss: 0.5947...
Epoch: 1/30... Step: 40... Loss: 0.6192...
Epoch: 1/30... Step: 42... Loss: 0.5105...
Epoch: 1/30... Step: 44... Loss: 0.6119...
Epoch: 1/30... Step: 46... Loss: 0.5187...
Epoch: 1/30... Step: 48... Loss: 0.6245...
Epoch: 1/30... Step: 50... Loss: 0.5786...
Epoch: 1/30... Step: 52... Loss: 0.5799...
Epoch: 1/30... Step: 54... Loss: 0.6755...
Epoch: 1/30... Step: 56... Loss: 0.6664...
Epoch: 1/30... Step: 58... Loss: 0.5973...
Epoch: 1/30... Step: 60... Loss: 0.5467...
Epoch: 1/30... Step: 62... Loss: 0.5841...
Epoch: 1/30... Step: 64... Loss: 0.5810...
Epoch: 1/30... Step: 66... Loss: 0.5821...
Epoch: 1/30