In [2]:
import common.time_layers as TL
import pickle
import numpy as np


class Rnnlm:
    def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100):
        V, H, D = vocab_size, wordvec_size, hidden_size
        normal = np.random.normal
        mu = 0
        root = np.sqrt
        embed_W = normal(mu, 0.01, (V, D))
        lstm_Wx = normal(mu, 1/root(D), (D, 4*H))
        lstm_Wh = normal(mu, 1/root(H), (H, 4*H))
        lstm_b = np.zeros(4*H, dtype=np.float_)
        affine_W = normal(mu, 1/root(H), (H, V))
        affine_b = np.zeros(V, dtype=np.float_)

        self.layers = (
            TL.TimeEmbedding(embed_W),
            TL.TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
            TL.TimeAffine(embed_W.T, affine_b)
        )
        self.loss_layer = TL.TimeSoftmaxWithLoss()
        self.lstm_layer = self.layers[1]

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params.extend(layer.params)
            self.grads.extend(layer.grads)

    def predict(self, xs: np.ndarray):
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs

    def forward(self, xs, ts):
        score = self.predict(xs)
        loss = self.loss_layer.forward(score, ts)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)

        return dout

    def reset_state(self):
        self.lstm_layer.reset_state()

    def save_params(self, fname="Rnnlm.pkl"):
        with open(fname, "wb")as f:
            pickle.dump(self.params, f)

    def load_params(self, fname="Rnnlm.pkl"):
        with open(fname, "rb") as f:
            self.params = pickle.load(f)


In [None]:
# coding: utf-8
from common.optimizers import SGD
from common.trainer import RnnlmTrainer
from common.util import eval_perplexity
from dataset import ptb


# ハイパーパラメータの設定
batch_size = 20
wordvec_size = 100
hidden_size = 100  # RNNの隠れ状態ベクトルの要素数
time_size = 35  # RNNを展開するサイズ
lr = 20.0
max_epoch = 4
max_grad = 0.25

# 学習データの読み込み
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_test, _, _ = ptb.load_data('test')
vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]

# モデルの生成
model = Rnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

# 勾配クリッピングを適用して学習
trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad,
            eval_interval=20)
trainer.plot(ylim=(0, 500))



In [3]:
# テストデータで評価
model.rest_state()
ppl_test = eval_perplexity(model, corpus_test)
print('test perplexity: ', ppl_test)

# パラメータの保存
model.save_params()

evaluating perplexity ...
234 / 235
test perplexity:  134.51278397479436


In [3]:
from common.functions import softmax

class RnnlmGen(Rnnlm):
    def generate(self, start_id: int, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        x = start_id

        while (len(word_ids) < sample_size):
            x = np.array(x).reshape(1,1)
            score = self.predict(x)
            p = softmax(score.flatten())
            sampleId = np.random.choice(len(p), size=1, p=p)

            if (skip_ids is None) or (sampleId not in skip_ids):
                x = sampleId
                word_ids.append(int(x))

        return word_ids


In [6]:
from dataset import ptb
corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
model.load_params('./Rnnlm.pkl')

# start文字とskip文字の設定
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 文章生成
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you allows buckle pursue christies adopting aerospace pinpoint eventual equivalents audits scams cray-3 shop reducing deprived carol or 's efficient respectability arms-control trudeau purpose agrees bringing regulate certificates buy dover boring basir hitachi ordinary leval rep fares teller packages balance advise charter highway beef consolidating ordinarily conner malignant subscribe selected einhorn lease capped opening latter dislike competitive guerrilla fashion hire contras erased creation arrangement running strict turns living wonderful impressive snapped ease covered averaging impose technical brushed instead french reasonable viewpoint b pont freeways trans anticipation tours compelling guaranty grant shuttle manufacturers shed cameras takeovers canceled relief rica sick shamir
