In [1]:
# -*- coding: utf-8 -*-

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "3"

import tensorflow as tf
import numpy as np
import time

from dnn_model import Model
from data_loader import text_data
data = text_data("./dataset/ptb/")


def initialize_session():
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.4
    return tf.Session(config=config)

##################################################
BATCH_SIZE = 5         # 배치 사이즈
num_k = 7               # 앞에 볼 단어 개수
emb_dim = 64            # 단어 embedding dimension
learning_rate = 0.0005  # Learning rate
use_clip = True         # Gradient clipping 쓸지 여부
##################################################

model = Model(num_k=num_k, emb_dim=emb_dim, vocab_size=data.vocab_size,
              use_clip=True, learning_rate=learning_rate)

sess = initialize_session()
sess.run(tf.global_variables_initializer())


def sample_test(test_input=""):
    # test_input = raw_input("test text: ") # input("test text: ") for python 2, 3
    test_x = np.zeros((1, num_k), dtype=np.int32)
    words = test_input.split()
    for i in range(min(num_k, len(words))):
        test_x[0][-i-1] = data.w2idx[words[-i-1]]
    out_x = sess.run(model.out_y, feed_dict={model.x: test_x})
    print(out_x[0], data.idx2w[out_x[0]])

def test_model():
    num_it = int(len(data.test_ids) / BATCH_SIZE)
    test_x = np.zeros((BATCH_SIZE, num_k), dtype=np.int32)
    mask = np.zeros(BATCH_SIZE, dtype=np.int32)
    test_loss, test_cnt = 0, 0

    for _ in range(num_it):
        test_ids, length = data.get_test(BATCH_SIZE)
        max_len = max(length)

        test_x.fill(0)
        mask.fill(0)

        for i in range(num_k - 1, max_len - 2):
            for batch in range(len(test_ids)):
                for j in range(0, num_k):
                    if i < j or i - j >= length[batch]:
                        break
                    test_x[batch][num_k - j - 1] = test_ids[batch][i - j]
                mask[batch] = 1 if length[batch] > i+1 else 0
                if length[batch] > i + 1:
                    input_y[batch] = test_ids[batch][i + 1]

            loss = sess.run(model.loss, feed_dict={model.x: test_x, model.y: input_y, model.mask: mask})
            test_loss += loss
            test_cnt += 1
    print("test loss: {:.3f}".format(test_loss / test_cnt))


input_x = np.zeros((BATCH_SIZE, num_k), dtype=np.int32)
input_y = np.zeros(BATCH_SIZE, dtype=np.int32)
input_mask = np.zeros(BATCH_SIZE, dtype=np.int32)
length = np.zeros(BATCH_SIZE, dtype=np.int32)

avg_loss, it_cnt = 0, 0
it_log, it_test, it_save, it_sample = 50, 250, 1000, 250
start_time = time.time()

for it in range(0, 10000):
    train_ids, length = data.get_train(BATCH_SIZE)
    max_len = max(length)
    input_x.fill(0)

    for i in range(num_k-1, max_len-2):
        for batch in range(len(train_ids)):
            for j in range(0, num_k):
                if i < j or i-j >= length[batch]:
                    break
                input_x[batch][num_k-j-1] = train_ids[batch][i-j]
            input_mask[batch] = 1 if length[batch] > i+1 else 0

            if length[batch] > i+1:
                input_y[batch] = train_ids[batch][i+1]

        loss, _ = sess.run([model.loss, model.update],
                           feed_dict={model.x: input_x, model.y: input_y, model.mask: input_mask})
        avg_loss += loss
        it_cnt += 1

    if it % it_log == 0:
        print(" it: {:4d} | loss: {:.3f} - {:.2f}s".format(it, avg_loss / it_cnt, time.time() - start_time))
        avg_loss, it_cnt = 0, 0

    if it % it_test == 0:
        test_model()
    if it % it_save == 0 and it > 0:
        model.save(sess)
    if it % it_sample == 0 and it > 0:
        sample_test(test_input="again the specialists were not able to")

sess.close()

  from ._conv import register_converters as _register_converters


 it:    0 | loss: 8.839 - 0.27s
test loss: 8.824
 it:   50 | loss: 7.046 - 25.45s
 it:  100 | loss: 6.875 - 27.56s
 it:  150 | loss: 7.042 - 29.68s
 it:  200 | loss: 6.880 - 32.03s
 it:  250 | loss: 6.674 - 34.34s
test loss: 6.637
32 the
 it:  300 | loss: 6.480 - 54.18s
 it:  350 | loss: 6.818 - 56.41s
 it:  400 | loss: 6.661 - 58.77s
 it:  450 | loss: 6.467 - 61.09s
 it:  500 | loss: 6.627 - 63.60s
test loss: 6.271
873 take
 it:  550 | loss: 6.584 - 83.50s
 it:  600 | loss: 6.249 - 85.73s
 it:  650 | loss: 6.156 - 87.88s
 it:  700 | loss: 5.743 - 90.20s
 it:  750 | loss: 6.433 - 92.16s
test loss: 6.033
32 the
 it:  800 | loss: 6.407 - 112.28s
 it:  850 | loss: 6.244 - 114.47s
 it:  900 | loss: 6.292 - 116.64s
 it:  950 | loss: 6.391 - 118.93s
 it: 1000 | loss: 6.239 - 121.38s
test loss: 6.018
Instructions for updating:
Please use tf.global_variables instead.
 * model saved at 'models/dnn'
32 the
 it: 1050 | loss: 5.403 - 141.57s
 it: 1100 | loss: 5.604 - 143.67s
 it: 1150 | loss: 5.91

 it: 9750 | loss: 2.910 - 964.48s
test loss: 2.723
409 continue
 it: 9800 | loss: 3.098 - 979.68s
 it: 9850 | loss: 2.831 - 981.59s
 it: 9900 | loss: 2.843 - 983.36s
 it: 9950 | loss: 3.152 - 985.29s
