In [1]:
from pathlib import Path
from common.utils import *
import numpy as np
import matplotlib.pyplot as plt
import import_ipynb
import csv

from dataset.gillam import load_data
from common.time_layers import *
from common.base_model import BaseModel

corpus, word_to_id, id_to_word = load_data('train')

In [2]:
print('children num:', len(corpus))
print('vocab size:', len(word_to_id))

print(corpus[0][-1])
print(corpus[100][-1])
print(corpus[200][-1])

max_index = np.argmax([len(x) for x in corpus])
print('max length and index:', len(corpus[max_index]), max_index)

children num: 540
vocab size: 3721
0
0
1
max length and index: 1718 467


In [3]:
class Rnnlm(BaseModel):
    def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100, dropout_ratio=0.5):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx1 = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh1 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b1 = np.zeros(4 * H).astype('f')
        lstm_Wx2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_Wh2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b2 = np.zeros(4 * H).astype('f')
        affine_W = (rn(D, 1) / 100).astype('f')
        affine_b = np.zeros(1).astype('f')

        self.layers = [
            TimeEmbedding(embed_W),
            TimeDropout(dropout_ratio),
            TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True),
            TimeDropout(dropout_ratio),
            TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True),
            TimeDropout(dropout_ratio)            
        ]
        self.clf_layer = TimeAffine(affine_W, affine_b)
        self.loss_layer = TimeSigmoidWithLoss()
        self.lstm_layers = [self.layers[2], self.layers[4]]
        self.drop_layers = [self.layers[1], self.layers[3], self.layers[5]]

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, xs, train_flg=False):
        for layer in self.drop_layers:
            layer.train_flg = train_flg

        for layer in self.layers:
            xs = layer.forward(xs)

        self.last_T = xs.shape[1]
        xs = xs[:,-1,:]
        return xs

    def forward(self, xs, label, train_flg=True):
        xs = self.predict(xs, train_flg)
        score = self.clf_layer.forward(xs)
        loss = self.loss_layer.forward(score, label)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)    # (N, 1) or (N,) -> (N, 1)
        dout = self.clf_layer.backward(dout)     # (N, 1) -> (N, H)

        # (N, H) -> (N, T, H)로 확장 (마지막 time step에만 gradient)
        N, H = dout.shape
        T = self.last_T
        dxs = np.zeros((N, T, H), dtype=dout.dtype)
        dxs[:, -1, :] = dout   # h_T에만 gradient 전달

        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

    def reset_state(self):
        for layer in self.lstm_layers:
            layer.reset_state()

batch_size = 미니배치 크기  
max_epoch = 학습 수행하는 에폭 수  
eval_interval = 결과 출력 간격  
max_grad = 기울기 최대 노름  
time_size = RNN이 한번에 펼쳐서 보는 time step 길이  

#### 첫번째 모델  
- batch_size = 20  
- wordvec_size = 256  
- hidden_size = 650  
- time_size = 35  
- lr = 20.0  
- max_epoch = 40  
- max_grad = 0.25  
- dropout = 0.5  

In [6]:
from common.optimizer import SGD
from common.trainer import RnnlmTrainer
from common.utils import eval_perplexity

batch_size = 20
max_epoch = 40
max_grad = 0.25
wordvec_size = 256
hidden_size = 650
time_size = 35
lr = 20.0
dropout = 0.5

corpus_val, _, _ = load_data("dev")
corpus_test, _, _ = load_data("test")

vocab_size = len(word_to_id)
label = np.array([corpus[i][-1] for i in range(len(corpus))])

model = Rnnlm(vocab_size, wordvec_size, hidden_size, dropout)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

best_ppl = float('inf')
for epoch in range(max_epoch):
    trainer.fit(corpus, label, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad)

    model.reset_state()
    ppl = eval_perplexity(model, corpus_val)
    print('검증 퍼플렉서티: ', ppl)

    if best_ppl > ppl:
        best_ppl = ppl
        model.save_params()
    else:
        lr /= 4.0
        optimizer.lr = lr

    model.reset_state()
    print('-' * 50)



NameError: name 'corpus' is not defined

In [55]:
data_size = 0
for x in corpus: data_size += len(x)
print(data_size)

204143
