In [1]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.layers import core as layers_core

In [2]:
print("TensorFlow Version", tf.__version__)

TensorFlow Version 1.6.0


# Data Generation

In [3]:
# Generating data

def generating_data(input_len=41, output_len=71):
    x = np.random.randint(2, size=(52999, 41))
    y = np.random.randint(2, size=(52999, 71))
    return x, y
x_input, y_label = generating_data()
print("input shape:{} , target shape:{} ".format(x_input.shape, y_label.shape))

input shape:(52999, 41) , target shape:(52999, 71) 


# Batch Data

In [4]:
def batch_generator(x_input,
                    y_label,
                    batch_size=5, 
                    seq_len=10,
                    test_ratio=0.1):
    
    assert y_label.shape[0]==x_input.shape[0], "no. of x & y are different " # Check data size
    
    ''' Generating eaach batch data(np.array) into a list'''
    total_batchs = int(y_label.shape[0] / seq_len / batch_size) # 
    test_batchs = int(total_batchs * test_ratio)
    train_batchs = total_batchs - test_batchs
    

    print('unuse data', (train_batchs + test_batchs)*batch_size*seq_len - y_label.shape[0])
    print('Train Batch: {} ; Test Batch: {}'.format(train_batchs, test_batchs))
    
    train_input = []
    train_target = []
    test_input = []
    test_target = []

    up = 0
    down = seq_len
    
    for _ in range(train_batchs):
        # data size in one batch [batch size, seq length, vector length]
        x = np.zeros(shape=(batch_size, seq_len, x_input.shape[1]))
        y = np.zeros(shape=(batch_size, seq_len, y_label.shape[1]))
        for b in range(batch_size):
            x[b, :, :] = x_input[up : down, :]
            y[b, :, :] = y_label[up : down, :]
            up = down
            down = down+seq_len
            #print(up, ':', down)
        train_input.append(x)
        train_target.append(y)
        
    for _ in range(test_batchs):
        x = np.zeros(shape=(batch_size, seq_len, x_input.shape[1]))
        y = np.zeros(shape=(batch_size, seq_len, y_label.shape[1]))
        for b in range(batch_size):
            #print(b, x.shape, range(up,down))
            x[b, :, :] = x_input[up : down, :]
            y[b, :, :] = y_label[up : down, :]
            up = down
            down = down+seq_len
            #print('Test data',up, ':', down)
        test_input.append(x)
        test_target.append(y)
    return train_input, train_target, test_input, test_target

# Seq2Seq with Attention

In [7]:
class Seq2Seq:
    def __init__(self,
                 seq_max_len=1.,  
                 input_len=41.,
                 output_len=71.,
                 batch_size=1,
                 lstm_size=[128., 128., 128.],
                 learning_rate=0.005,
                 grad_clip=2.,
                 keep_prob=0.8,
                 forward_only= None):
        
        self.seq_max_len = seq_max_len
        self.batch_size = batch_size
        self.batch_len = np.array([])
        self.input_len = input_len
        self.output_len = output_len
        self.lstm_size = lstm_size
        self.num_units = self.lstm_size[-1]
        self.num_layers = len(self.lstm_size)
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.train_keep_prob = keep_prob
        self.go_token = 9.
        self.batch_seq_len = np.int32(np.ones(shape=([self.batch_size])) * self.seq_max_len)
        
        
        
        '''Executing function'''
        tf.reset_default_graph() #Clears the default graph stack and resets the global default graph
        self.build_inputs()
        self.build_encoder()
        self.build_decoder()
        self.build_loss()
        self.build_optimizer()
        self.saver = tf.train.Saver() #Saves and restores variables.
 

    def build_inputs(self):
        self.encoder_inputs = tf.placeholder(tf.float32, 
                                         shape=(self.batch_size, self.seq_max_len, self.input_len),
                                         name='inputs')
        self.targets = tf.placeholder(tf.float32,
                                         shape=(self.batch_size, self.seq_max_len, self.output_len),
                                         name='targets')
        self.decoder_inputs = tf.placeholder(tf.float32,
                                                shape=(self.batch_size, self.seq_max_len, self.output_len),  
                                                name='decoder_inputs')        
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
         
        '''if seq is different， sequence input was needed :
        self.input_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.int32, name='input_length')
        self.decoder_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.int32, name='decoder_inputs_length')
        self.target_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.float32, name='target_sequence_length')
        '''
        

    # Encoder Model==========================================================================
    def build_encoder(self):
        ''' Encoder Model'''
        def get_a_cell(lstm_size, keep_prop):
            lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
            drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=self.train_keep_prob)
            return drop

        with tf.variable_scope('encoder', initializer=tf.orthogonal_initializer()):
            encoder_cell = tf.nn.rnn_cell.MultiRNNCell(
                                 [get_a_cell(size, self.keep_prob) for size in self.lstm_size]
                                                      )
            self.initial_state = encoder_cell.zero_state(self.batch_size, tf.float32)
            # 透過dynamic_rnn對cell展開時間維度
            self.encoder_outputs, self.encoder_state  = tf.nn.dynamic_rnn(
                                                              encoder_cell, 
                                                              self.encoder_inputs,                                                    
                                                              initial_state=self.initial_state
                                                                          )

            
    # Decoder Model with Attention=========================================================
    def build_decoder(self):
        def get_a_cell(lstm_size, keep_prop):
            lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
            drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=self.train_keep_prob)
            return drop
        
        decoder_layer = [self.lstm_size[-1] for _ in range(4)]
        d_cell = tf.nn.rnn_cell.MultiRNNCell(
                                 [get_a_cell(size, self.keep_prob) for size in decoder_layer]
                                            )
        #d_cell = tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size[-1])
        #decoder_cell = tf.contrib.rnn.GRUCell(self.lstm_size[-1])
        
            
        '''Wrappe decoder cell by attention mechanism'''
        #attention_states: [batch_size, max_time, num_units]
        attention_states = self.encoder_outputs
        
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                                 self.lstm_size[-1],
                                 attention_states)
        
        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                                  d_cell,
                                  attention_mechanism,
                                  attention_layer_size=self.lstm_size[-1])
        
        #decoder_cell = tf.contrib.rnn.OutputProjectionWrapper(
        #                          decoder_cell, 
        #                          self.output_len, 
        #                          reuse=tf.AUTO_REUSE
        #                          )
       
        '''Project layer (output layer / full connecting layers)'''
        project_layer = layers_core.Dense(self.output_len, 
                                          kernel_initializer=tf.truncated_normal_initializer(mean=0.1,stddev=0.1), 
                                          name="output_projection") 
        
        decoder_initial_state = decoder_cell.zero_state(batch_size, dtype=tf.float32)
        
        ''' Two decoder model:
        . training_decoder : for training & target as input
        . predict_decoder : for predecting & input by beam search etc...
        '''
        with tf.variable_scope("decode", initializer=tf.orthogonal_initializer()):
            '''1. Training decoder & output 
                  Time_major =False(default): [batch_size, max_seq_len, vector_len]
                  Time_major =True : [max_seq_len, batch_size, vector_len]
            '''
            
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                                                 self.decoder_inputs, 
                                                 self.batch_seq_len, 
                                                 time_major=False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                                                  cell=decoder_cell,
                                                  helper=training_helper,
                                                  initial_state=decoder_initial_state,#self.encoder_state[-1],
                                                  output_layer=project_layer)
            
            train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder, impute_finished=True)
            self.logits = train_outputs.rnn_output
            self.train_prediction = tf.sigmoid(self.logits, name='train_predictions')
            t_pred = tf.identity(self.train_prediction, name='t_pred')
        
        
        with tf.variable_scope("decode", reuse=True):
            '''2. Predicting decoder & output (same parameter) '''
            
            '''Three function for CustomHelper'''
            def initial_fn():
                initial_elements_finished = self.go_token
                initial_input = tf.concat(self.go_token, shape=[self.batch_size, self.output_len])
                return initial_elements_finished, initial_input

            def sample_fn(time, outputs, state):
                # 选择logit最大的下标作为sample
                prediction = tf.to_int32(outputs)
                return prediction

            def next_inputs_fn(time, outputs, state, sample_ids):
                next_input = tf.concat((outputs, encoder_outputs[time]), 1)
                elements_finished = (time >= decoder_lengths)  # this operation produces boolean tensor of [batch_size]
                next_state = state
                return elements_finished, next_inputs, next_state
        
        
            predicting_helper = tf.contrib.seq2seq.CustomHelper(initial_fn, sample_fn, next_inputs_fn)

            predicting_decoder = tf.contrib.seq2seq.BasicDecoder(cell=decoder_cell,
                                                                 helper=predicting_helper,
                                                                 initial_state=decoder_initial_state,
                                                                 output_layer=project_layer)
            predicting_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder, impute_finished=True)
            predict_logits = train_outputs.rnn_output
            self.final_prediction = tf.sigmoid(predict_logits, name='model_predictions')
            model_pred = tf.identity(self.final_prediction, name='model_pred')
        
        

    # Loss & Optimizer ==============================================================================
    def build_loss(self):
        with tf.name_scope('loss'):
            #self.y_reshaped = tf.reshape(self.targets,  self.logits.get_shape())
            #self.loss =tf.losses.mean_squared_error(predictions=self.logits, labels=self.targets)
            loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.targets)
            self.loss = tf.reduce_mean(loss)

    def build_optimizer(self):
        # Using "clipping" gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), self.grad_clip)
        train_op = tf.train.AdamOptimizer(self.learning_rate)
        self.optimizer = train_op.apply_gradients(zip(grads, tvars))  
        
        

    # Training===============================================================================    
    def train(self, x, y, iters=10,  save_every_n=200, log_every_n=200):
        self.train_graph = tf.Graph()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(self.initial_state)           
            
            for ite in range(iters):
                step = 0
                print('iters: {}'.format(ite))
                for i in range(len(x)):
                    step += 1
                    start = time.time()
                    
                    feed = {self.encoder_inputs: x[i], 
                            self.targets: y[i],
                            self.decoder_inputs : y[i],
                            self.keep_prob: self.train_keep_prob}
                            #self.initial_state: new_state}
                    
                    batch_loss, new_state, pred, target = sess.run([self.loss,
                                                      self.optimizer,
                                                      self.train_prediction,
                                                      self.targets],
                                                      feed_dict=feed)
                    end = time.time()
                    
                    # control the print lines
                    if step % log_every_n == 0:
                        print("=======================================================\n")
                        print('step: {} in iter: {}/{}... '.format(step, ite+1, iters),
                              'loss: {:.15f}... '.format(batch_loss),
                              '{:.4f} sec/batch'.format((end - start)))

                    if (step % save_every_n == 0):
                        self.saver.save(sess, './seq2seq_(y_to_x)/model')
    
    
    def predict(self, x, y, variable_name='model_pred'):
        with tf.Session() as sess:
            loader = tf.train.import_meta_graph('./seq2seq_(y_to_x)/model.meta')
            loader.restore(sess, './seq2seq_(y_to_x)/model')
            graph = tf.get_default_graph()
            
            '''Get the tensor'''
            encoder_input = graph.get_tensor_by_name('inputs:0')
            decoder_input = graph.get_tensor_by_name('decoder_inputs:0')
            target = graph.get_tensor_by_name('targets:0')
            keep_prob= graph.get_tensor_by_name('keep_prob:0')
            '''Note the same name will be added "_1" with default'''
            prediction = sess.graph.get_tensor_by_name('decode_1/model_pred:0')
            
            self.count = 0
            self.total = 0
            
            for i in range(len(x)):
                feed = {self.encoder_inputs: x[i], 
                                self.targets: y[0], # just make sure target not influence result
                                self.decoder_inputs : y[0], # just make sure target not influence result
                                self.keep_prob: 1.}
                self.answer = sess.run(prediction, feed_dict=feed)
                c, t = self.accuracy(self.answer, y[i])
                self.count += c
                self.total += t
        return self.count/self.total
            # print(answer)
        
    
    def accuracy(self, pred, target):
        pred = np.array(pred)
        pred = np.array(pred >= 0.5).astype(int)
        result = np.abs(pred - target)
        
        count = np.sum(result)
        total = result.size

        for i in range(result.shape[0]):
            np.savetxt("result_" + str(i) + ".csv", result[i], delimiter=',')
        return count, total


# Training Model

In [8]:
'''
seq_max_len=1.,  
input_len=41.,
output_len=71.,
batch_size=1,
lstm_size=[128., 128., 128.],
learning_rate=0.005,
grad_clip=2.,
keep_prob=0.8,
forward_only= None
'''

'\nseq_max_len=1.,  \ninput_len=41.,\noutput_len=71.,\nbatch_size=1,\nlstm_size=[128., 128., 128.],\nlearning_rate=0.005,\ngrad_clip=2.,\nkeep_prob=0.8,\nforward_only= None\n'

In [9]:
batch_size = 1
seq_len = 1
lstm_size=[70., 70., 70.]
learning_rate=0.1
keep_prob=0.005
iters=1

save_path='./seq2seq_(y_to_x)'

In [10]:
if os.path.exists(save_path) is False:
    os.makedirs(save_path)
    
train_input, train_target, test_input, test_target = batch_generator(
                                                                x_input,
                                                                y_label,
                                                                batch_size=batch_size, 
                                                                seq_len=seq_len)

model = Seq2Seq(batch_size=batch_size, 
                seq_max_len=seq_len,
                lstm_size=lstm_size,
                learning_rate=learning_rate)

model.train(train_input, 
            train_target,
            iters=iters,
            save_every_n=1000,
            log_every_n =200
            )

unuse data 0
Train Batch: 47700 ; Test Batch: 5299
iters: 0

step: 200 in iter: 1/1...  loss: 0.765205025672913...  0.0059 sec/batch

step: 400 in iter: 1/1...  loss: 0.687609374523163...  0.0058 sec/batch

step: 600 in iter: 1/1...  loss: 0.682296693325043...  0.0058 sec/batch

step: 800 in iter: 1/1...  loss: 0.726226687431335...  0.0058 sec/batch

step: 1000 in iter: 1/1...  loss: 0.711045563220978...  0.0058 sec/batch

step: 1200 in iter: 1/1...  loss: 0.705324172973633...  0.0057 sec/batch

step: 1400 in iter: 1/1...  loss: 0.708758771419525...  0.0057 sec/batch

step: 1600 in iter: 1/1...  loss: 0.728832960128784...  0.0057 sec/batch

step: 1800 in iter: 1/1...  loss: 0.721849620342255...  0.0057 sec/batch

step: 2000 in iter: 1/1...  loss: 0.701219141483307...  0.0057 sec/batch

step: 2200 in iter: 1/1...  loss: 0.697612166404724...  0.0058 sec/batch

step: 2400 in iter: 1/1...  loss: 0.698890984058380...  0.0066 sec/batch

step: 2600 in iter: 1/1...  loss: 0.728691279888153... 

KeyboardInterrupt: 

In [104]:
model.predict(test_input, test_target)

INFO:tensorflow:Restoring parameters from ./seq2seq_(y_to_x)/model


0.49845705673937946

eq2seq的NMT模型怎样训练-论文《Massive Exploration of Neural Machine Translation Architectures》
http://cairohy.github.io/2017/04/11/deeplearning/NLP-Hyperparams-train-arXiv2017-%E3%80%8AMassive%20Exploration%20of%20Neural%20Machine%20Translation%20Architectures%E3%80%8B/