In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [35]:
from hidden_others.ch06.rnnlm import Rnnlm
from hidden_others.ch06.better_rnnlm import BetterRnnlm
from common.functions import softmax
from common.base_model import BaseModel

In [3]:
class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids=[start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())

            sampled = np.random.choice(len(p), size=1, p=p)

            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))


        return word_ids

In [4]:
# 実際に文書生成してみる
from hidden_others.dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

start_word = 'you'
start_id = word_to_id[start_word]
model = RnnlmGen()
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

# 文書生成
word_ids = model.generate(start_id, skip_ids=skip_ids)
txt = ' '.join([id_to_word[id] for id in word_ids])
txt = txt.replace('<eos>', '.\n')
print(txt)


you trespass wis. cat communities poured cowboys sony rtc materials weighing fla. arkansas remains including refusing angry corners shipped burnham employ difficult lobbying organic congressman foothills consumed nearby doldrums heritage inventories bullet justifies studio burden most protest burns approve banponce inappropriate preparation slowly shaky smaller critical led knowledge electoral lucrative caution petco detailed south jeff aoun occurs per gaf stick appropriate programming scenario seven-year fairfield deliberately carbon grants investor initiated resistance requires homeowners ohbayashi mural cover stadium towers annuities bikers camera statute conflicts chip identifying planners pegged cheaper cities relationship neat tables aviation convenient bullets gathering figures o. futures attracting


In [5]:
# 学習済みモデルで生成
# 実際に文書生成してみる
from hidden_others.dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

start_word = 'you'
start_id = word_to_id[start_word]
model = RnnlmGen()
model.load_params(file_name='Rnnlm.pkl')
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

# 文書生成

word_ids = model.generate(start_id, skip_ids=skip_ids)
txt = ' '.join([id_to_word[id] for id in word_ids])
txt = txt.replace('<eos>', '.\n')
print(txt)

you also do n't day .
 in earnings .
 leaping into those who gauge bound injection of government loans from the federal court .
 joseph 's of counting improving it in a short significantly financial move and that it will take some base corn .
 using charts will be able to form a tumultuous much of the total of infiniti daily shares as well as well unload cleaning up about thursday copies of an rebuild slide .
 while they can get a heavy treasurys of rival naked growth during the pop however is almost signs of people explaining time out


In [6]:
# lstmモデルでもやってみる
from hidden_others.ch06.better_rnnlm import BetterRnnlm

class betterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids=[start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())

            sampled = np.random.choice(len(p), size=1, p=p)

            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))


        return word_ids



corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

start_word = 'you'
start_id = word_to_id[start_word]
model = betterRnnlmGen()
model.load_params(file_name='hidden_others/BetterRnnlm.pkl')
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

# 文書生成

word_ids = model.generate(start_id, skip_ids=skip_ids)
txt = ' '.join([id_to_word[id] for id in word_ids])
txt = txt.replace('<eos>', '.\n')
print(txt)

you project the store in a red from the thinking of me last year .
 he unit 's most lucrative worker learned it goupil never attempted because his hepatitis 's business was made .
 the team 's team change a side note we morning under the port of toronto .
 by beginning a four-game area may seek left from the new project in the southwest .
 this is a theater for a while and look like it or all the show and huge stuff at us like is .
 many speak watch it in a country like that makes panels


In [7]:
# the meaning of lileで始まる文章
model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

# predictで最後の文字以外の文字を順番に与えて情報を持たせる
for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

# 最後の文字から文章生成スタート
word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids # 最初に与えたワードを先頭に与える
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)

--------------------------------------------------
the meaning of life is n't really consistent but added that bells swing with weakest and boomers are well off.
 in business news on integrated 's tumbled the ski system 's health operations were turned steadily.
 in a number of occasions and said they are indeed very favorite.
 the conviction of a first reference to the field 's punitive damages among the installations was 's merit.
 a lot of fraud has and talked to people.
 we alike are trying to condemn the project.
 for instance that hope.
 what sent my pictures to be sure i did n't


In [9]:
# 足し算データセット

from hidden_others.dataset import sequence

(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt', seed=1984)
char_to_id, id_to_char = sequence.get_vocab()

print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)

(45000, 7) (45000, 5)
(5000, 7) (5000, 5)


In [111]:
# Encoder 
from common.time_layers import *

class Encoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        enbed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, H*4) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, H*4) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(H*4).astype('f')

        self.embed = TimeEmbedding(enbed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) # １ブロックでdecoderに渡すので保持する理由がない

        self.params = self.embed.params + self.lstm.params
        self.grads = self.embed.grads + self.lstm.grads
        self.hs = None


    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]


    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh # 最後の時系列以外は廃棄なので0のまま　-> 伝える勾配がない
        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout
        

In [168]:
# decoder
# 注意 -> 学習時と生成時の最後の層は異なるので注意　　学習時：　softmax, 生成時：argmax

class Decoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, H*4) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, H*4) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(H * 4).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')

        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)

        self.params, self.grads = [], []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads

    # Encoderからhを受け取るのでセットを忘れないように注意
    def forward(self, xs, h):
        self.lstm.set_state(h)
        out = self.embed.forward(xs)
        out = self.lstm.forward(out)
        score = self.affine.forward(out)
        return score


    def backward(self, dscore):
        dout = self.affine.backward(dscore)
        dout = self.lstm.backward(dout)
        dout = self.embed.backward(dout)
        dh = self.lstm.dh
        return dh


    def generate(self, h, start_id, sample_size):
        sampled = []
        sample_id = start_id
        self.lstm.set_state(h)

        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1, 1))
            out = self.embed.forward(x)
            out = self.lstm.forward(out)
            score = self.affine.forward(out)

            sample_id = np.argmax(score.flatten())
            sampled.append(int(sample_id))


        return sampled

In [162]:
# class Decoder:
#     def __init__(self, vocab_size, wordvec_size, hidden_size):
#         V, D, H = vocab_size, wordvec_size, hidden_size
#         rn = np.random.randn

#         embed_W = (rn(V, D) / 100).astype('f')
#         lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
#         lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
#         lstm_b = np.zeros(4 * H).astype('f')
#         affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
#         affine_b = np.zeros(V).astype('f')

#         self.embed = TimeEmbedding(embed_W)
#         self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
#         self.affine = TimeAffine(affine_W, affine_b)

#         self.params, self.grads = [], []
#         for layer in (self.embed, self.lstm, self.affine):
#             self.params += layer.params
#             self.grads += layer.grads

#     def forward(self, xs, h):
#         self.lstm.set_state(h)

#         out = self.embed.forward(xs)
#         out = self.lstm.forward(out)
#         score = self.affine.forward(out)
#         return score

#     def backward(self, dscore):
#         dout = self.affine.backward(dscore)
#         dout = self.lstm.backward(dout)
#         dout = self.embed.backward(dout)
#         dh = self.lstm.dh
#         return dh

#     def generate(self, h, start_id, sample_size):
#         sampled = []
#         sample_id = start_id
#         self.lstm.set_state(h)

#         for _ in range(sample_size):
#             x = np.array(sample_id).reshape((1, 1))
#             out = self.embed.forward(x)
#             out = self.lstm.forward(out)
#             score = self.affine.forward(out)

#             sample_id = np.argmax(score.flatten())
#             sampled.append(int(sample_id))

#         return sampled


In [163]:
# # Seq2seqの実装

# class Seq2seq(BaseModel):
#     def __init__(self, vocab_size, wordvec_size, hidden_size):
#         V, D, H = vocab_size, wordvec_size, hidden_size
#         self.encoder = Encoder(V, D, H)
#         self.decoder = Decoder(V, D, H)
#         self.softmax = TimeSoftmaxWithLoss()


#         self.params = self.encoder.params + self.decoder.params
#         self.grads = self.encoder.grads + self.decoder.grads


#     def forward(self, xs, ts):
#         decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:] # decoderとencoderで入力が異なるので注意

#         h = self.encoder.forward(xs)
#         score = self.decoder.forward(decoder_xs, h)
#         loss = self.softmax.forward(score, decoder_ts)
#         return loss


#     def backward(self, dout=1):
#         dout = self.softmax.backward(dout)
#         dh = self.decoder.backward(dout)
#         dout = self.encoder.backward(dh)
#         return dout

#     def generate(self, xs, start_id, sample_size):
#         h = self.encoder.forward(xs)
#         sampled = self.decoder.generate(h, start_id, sample_size)
#         return sampled

In [169]:
class Seq2seq(BaseModel):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()

        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads

    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]

        h = self.encoder.forward(xs)
        score = self.decoder.forward(decoder_xs, h)
        loss = self.softmax.forward(score, decoder_ts)
        return loss

    def backward(self, dout=1):
        dout = self.softmax.backward(dout)
        dh = self.decoder.backward(dout)
        dout = self.encoder.backward(dh)
        return dout

    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)
        sampled = self.decoder.generate(h, start_id, sample_size)
        return sampled

## seq2seqの評価

In [170]:
# coding: utf-8
from common.time_layers import *


class PeekyDecoder:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(H + D, 4 * H) / np.sqrt(H + D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H + H, V) / np.sqrt(H + H)).astype('f')
        affine_b = np.zeros(V).astype('f')

        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)

        self.params, self.grads = [], []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params
            self.grads += layer.grads
        self.cache = None

    def forward(self, xs, h):
        N, T = xs.shape
        N, H = h.shape

        self.lstm.set_state(h)

        out = self.embed.forward(xs)
        hs = np.repeat(h, T, axis=0).reshape(N, T, H)
        out = np.concatenate((hs, out), axis=2)

        out = self.lstm.forward(out)
        out = np.concatenate((hs, out), axis=2)

        score = self.affine.forward(out)
        self.cache = H
        return score

    def backward(self, dscore):
        H = self.cache

        dout = self.affine.backward(dscore)
        dout, dhs0 = dout[:, :, H:], dout[:, :, :H]
        dout = self.lstm.backward(dout)
        dembed, dhs1 = dout[:, :, H:], dout[:, :, :H]
        self.embed.backward(dembed)

        dhs = dhs0 + dhs1
        dh = self.lstm.dh + np.sum(dhs, axis=1)
        return dh

    def generate(self, h, start_id, sample_size):
        sampled = []
        char_id = start_id
        self.lstm.set_state(h)

        H = h.shape[1]
        peeky_h = h.reshape(1, 1, H)
        for _ in range(sample_size):
            x = np.array([char_id]).reshape((1, 1))
            out = self.embed.forward(x)

            out = np.concatenate((peeky_h, out), axis=2)
            out = self.lstm.forward(out)
            out = np.concatenate((peeky_h, out), axis=2)
            score = self.affine.forward(out)

            char_id = np.argmax(score.flatten())
            sampled.append(char_id)

        return sampled


class PeekySeq2seq(Seq2seq):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = PeekyDecoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()

        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads

In [171]:
from hidden_others.dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq

# データ準備
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
char_to_id, id_to_char = sequence.get_vocab()

# ハイパーパラメータの設定
vacab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

# モデル
model = Seq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad)

    # epoch毎の評価
    correct_num = 0
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]] # shapeを二次元にしたままにする
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose)

    acc = float(correct_num) / len(x_test)
    acc_list.append(acc)
    print(f'val acc {acc * 100}')

| epoch 1 |  iter 1 / 351 | time 0[s] | loss 2.56
| epoch 1 |  iter 21 / 351 | time 0[s] | loss 2.53
| epoch 1 |  iter 41 / 351 | time 1[s] | loss 2.17
| epoch 1 |  iter 61 / 351 | time 2[s] | loss 1.96
| epoch 1 |  iter 81 / 351 | time 2[s] | loss 1.92
| epoch 1 |  iter 101 / 351 | time 3[s] | loss 1.87
| epoch 1 |  iter 121 / 351 | time 4[s] | loss 1.85
| epoch 1 |  iter 141 / 351 | time 5[s] | loss 1.83
| epoch 1 |  iter 161 / 351 | time 5[s] | loss 1.79
| epoch 1 |  iter 181 / 351 | time 6[s] | loss 1.77
| epoch 1 |  iter 201 / 351 | time 7[s] | loss 1.77
| epoch 1 |  iter 221 / 351 | time 8[s] | loss 1.76
| epoch 1 |  iter 241 / 351 | time 8[s] | loss 1.76
| epoch 1 |  iter 261 / 351 | time 9[s] | loss 1.76
| epoch 1 |  iter 281 / 351 | time 10[s] | loss 1.75
| epoch 1 |  iter 301 / 351 | time 11[s] | loss 1.74
| epoch 1 |  iter 321 / 351 | time 12[s] | loss 1.75
| epoch 1 |  iter 341 / 351 | time 12[s] | loss 1.74
Q 77+85  
T 162 
[91m☒[0m 100 
---
Q 975+164
T 1139
[91m☒[0m 1

In [185]:
test = np.random.randn(2, 5)

In [186]:
np.repeat(test, 2, axis=0).reshape(2, 2, 5)

array([[[ 0.34388132, -1.08087195, -0.90257912,  0.42967783,
          0.71144932],
        [ 0.34388132, -1.08087195, -0.90257912,  0.42967783,
          0.71144932]],

       [[-0.3335634 ,  1.0436824 ,  1.42282492, -1.14696863,
          0.8775764 ],
        [-0.3335634 ,  1.0436824 ,  1.42282492, -1.14696863,
          0.8775764 ]]])

In [187]:
np.repeat(test, 2, axis=0)

array([[ 0.34388132, -1.08087195, -0.90257912,  0.42967783,  0.71144932],
       [ 0.34388132, -1.08087195, -0.90257912,  0.42967783,  0.71144932],
       [-0.3335634 ,  1.0436824 ,  1.42282492, -1.14696863,  0.8775764 ],
       [-0.3335634 ,  1.0436824 ,  1.42282492, -1.14696863,  0.8775764 ]])

In [189]:
x_train.shape

(45000, 7)

In [190]:
t_train.shape

(45000, 5)

In [191]:
t_train

array([[ 6,  0, 11,  7,  5],
       [ 6,  3, 10, 10,  5],
       [ 6,  3,  1,  3,  5],
       ...,
       [ 6,  7, 11,  9,  5],
       [ 6,  8,  3,  3,  5],
       [ 6,  4,  1,  4,  5]])