In [1]:
import tensorflow as tf
from tensorflow.contrib import seq2seq
import helper
from data_generator import CoupletsDataGenerator
import numpy as np

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
%cd ..

/Users/zhangshulin_work/Desktop/AI-Play/Git/couplets


In [3]:
vocab_size, index2word, word2index, train_set, dev_set, test_set = helper.process_dataset()

vocab_size: 729587
train_set shape: (766491, 52)
dev_set shape: (4000, 40)
test_set shape: (4000, 43)


In [4]:
train_gen = CoupletsDataGenerator(train_set)
dev_gen = CoupletsDataGenerator(dev_set)
test_gen = CoupletsDataGenerator(test_set)

In [5]:
def build_inputs():
    with tf.name_scope('input_placeholders'):
        input = tf.placeholder(shape=(None, None), dtype=tf.int32, name='input')
        label = tf.placeholder(shape=(None, None), dtype=tf.int32, name='label')
        keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')
        
    return input, label, keep_prob

In [6]:
def build_embeding_layer(input, vocab_size, embeding_dim):
    with tf.name_scope('embeding_layer'):
        embeding_matrix = tf.Variable(tf.random_uniform(shape=(vocab_size, embeding_dim),
                                                        minval=-1, maxval=1, dtype=tf.float32),
                                     name='embeding_matrix')
        embeding = tf.nn.embedding_lookup(embeding_matrix, input, name='embed_lookup')
        
    return embeding

In [24]:
def build_lstm_cell(num_units, num_layers, keep_prob, batch_size):
    with tf.name_scope('lstm_cell'):
        def lstm():
            lstm = tf.nn.rnn_cell.BasicLSTMCell(num_units)
            dropout = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob)
            return dropout
        
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm() for _ in range(num_layers)])
        init_zero_state = cell.zero_state(batch_size, tf.float32)

    return cell, init_zero_state

In [25]:
def build_lstm_layer(cell, embed_input, init_state):
    with tf.name_scope('lstm_layer'):
        outputs, final_state = tf.nn.dynamic_rnn(cell, embed_input, initial_state=init_state)
        
    return outputs, final_state

### Hyper-parameters

In [26]:
embeding_dim = 10
lstm_units = 16
lstm_layers = 1
keep_prob = 0.7
batch_size = 8
lr = 0.01
epochs = 2
grad_clip = 5

In [27]:
def build_forward(cell, input, init_state):
    embeding = build_embeding_layer(input, vocab_size, embeding_dim)
    
    outputs, final_state = build_lstm_layer(cell, embeding, init_state)
    
    logits = tf.layers.dense(outputs, vocab_size, name='fc_layer')
    outputs = tf.nn.softmax(logits)
    
    return outputs, logits, final_state

In [28]:
def build_optimizer(cost, lr, grad_clip):
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(lr)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [29]:
def train(lr=lr, batch_size=batch_size, epochs=epochs, keep_prob=keep_prob, print_step=50):
    tf.reset_default_graph()
    
    input_pl, label_pl, keep_prob_pl = build_inputs()
    cell_op, init_zero_state_op = build_lstm_cell(lstm_units, lstm_layers, 
                                                  keep_prob, batch_size)
    
    init_state_op = init_zero_state_op
    
    outputs_op, logits_op, final_state_op = build_forward(cell_op, input_pl, init_state_op)
    
    cost_op = seq2seq.sequence_loss(logits_op, label_pl, 
                                    tf.ones_like(input_pl, dtype=tf.float32))
    
    optimizer_op = build_optimizer(cost_op, lr, grad_clip)
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    init_state = sess.run(init_zero_state_op)
    
    step = 0    
    for X, Y in train_gen.get_batch(sess, batch_size, epochs):
        feed_dict = {
            input_pl: X,
            label_pl: Y,
            init_state_op: init_state,
            keep_prob_pl: keep_prob
        }
        
        _, cost, final_state = sess.run([optimizer_op, cost_op, final_state_op],
                                        feed_dict=feed_dict)
        
        if step % print_step == 0:
            print('step:{0}--cost:{1:.3f}'.format(step, cost))
        
        init_state = final_state
        step += 1
        
    sess.close()

In [30]:
train(print_step=2)

step:0--cost:13.50
step:2--cost:13.41


KeyboardInterrupt: 