In [59]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
#from tensorflow.contrib.rnn import GRUCell
from tensorflow.python.layers import core as layers_core

# __all__ = ["AttentionModel"]
print("TensorFlow Version", tf.__version__)

TensorFlow Version 1.8.0


In [None]:
"""tensorflow.python.layers:
layers 模塊提供用於深度學習的更高層次封裝的 API，

tf.layers 模塊提供的方法有：
    Input(…): 用於實例化一個輸入 Tensor，作為神經網絡的輸入。
    average_pooling1d(…): 一維平均池化層
    average_pooling2d(…): 二維平均池化層
    average_pooling3d(…): 三維平均池化層
    batch_normalization(…): 批量標準化層
    conv1d(…): 一維卷積層
    conv2d(…): 二維卷積層
    conv2d_transpose(…): 二維反捲積層
    conv3d(…): 三維卷積層
    conv3d_transpose(…): 三維反捲積層
    dense(…): 全連接層
    dropout(…): Dropout層
    flatten(…): Flatten層，即把一個 Tensor 展平
    max_pooling1d(…): 一維最大池化層
    max_pooling2d(…): 二維最大池化層
    max_pooling3d(…): 三維最大池化層
    separable_conv2d(…): 二維深度可分離卷積層
"""

# Data

In [2]:
x_input = pd.read_csv('plc_x_reduce.csv')
y_label = pd.read_csv('plc_y_reduce.csv')

In [34]:
def batch_generator(x_input,
                    y_label,
                    batch_size=1, 
                    seq_len=1):
    
    batchs = int(y_label.shape[0] / seq_len / batch_size)
    print(batchs)
    
    input_data = []
    target_data = []
    
    
    for i in range(batchs):
        x = np.zeros(shape=(batch_size, seq_len, x_input.shape[1]))
        y = np.zeros(shape=(batch_size, seq_len, y_label.shape[1]))
        for b in range(batch_size):

            x[b, :, :] = x_input[b*i : b*i+seq_len, :]
            y[b, :, :] = y_label[b*i : b*i+seq_len, :]

        input_data.append(x)
        target_data.append(y)              
    return input_data, target_data

# Model

In [80]:
class Seq2Seq:
    def __init__(self,
                 seq_max_len=1.,  
                 input_len=71.,
                 output_len=41.,
                 batch_size=1,
                 lstm_size=[71., 80., 41.],
                 num_layers=3.,
                 learning_rate=0.001,
                 grad_clip=2.,
                 keep_prob=1.,
                 forward_only= None):
        
        
        self.seq_max_len = seq_max_len
        self.batch_size = batch_size
        self.batch_len = np.array([])
        self.input_len = input_len
        self.output_len = output_len
        self.lstm_size = lstm_size
        self.num_units = self.lstm_size[-1]
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.train_keep_prob = keep_prob
        self.decoder_hidden_size = lstm_size[-1]
        
        self.batch_seq_len = np.int32(np.ones(shape=([self.batch_size])) * self.seq_max_len)
        
        
        
        
        tf.reset_default_graph() #Clears the default graph stack and resets the global default graph
        self.build_inputs()
        #self.build_embedding()
        self.build_encoder()
        self.build_atten_decoder()
        self.build_loss()
        self.build_optimizer()
        self.saver = tf.train.Saver() #Saves and restores variables.
 
        

    def build_inputs(self):
        self.encoder_inputs = tf.placeholder(tf.float32, 
                                         shape=(self.batch_size, self.seq_max_len, self.input_len),
                                         name='inputs')

        self.targets = tf.placeholder(tf.float32,
                                          shape=(self.batch_size, self.seq_max_len, self.output_len),
                                          name='targets')

        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        
        self.decoder_inputs = tf.placeholder(tf.float32,
                                                shape=(self.batch_size, self.seq_max_len, self.lstm_size[-1]),  
                                                name='decoder_inputs')

        
        #self.input_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.int32, name='input_length')
        #self.decoder_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.int32, name='decoder_inputs_length')
        #self.target_sequence_length = tf.placeholder(shape=([self.batch_size]), dtype=tf.float32, name='target_sequence_length')


    '''
    def build_embedding(self):
        # Embedding
        initializer = tf.random_uniform_initializer(-1, 1, dtype=tf.float32)
        embeddings = tf.get_variable(name='embedding',
                                     shape=[vocab_size, encoder_embedding_size],
                                     initializer=initializer, 
                                     dtype=tf.float32)
        # encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, self.encoder_inputs)
        self.decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, self.decoder_inputs)
    '''

    def build_encoder(self):
        def get_a_cell(lstm_size, keep_prop):
            lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
            drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=self.train_keep_prob)
            return drop

        with tf.variable_scope('encoder', initializer=tf.orthogonal_initializer()):
            encoder_cell = tf.nn.rnn_cell.MultiRNNCell(
                                 [get_a_cell(size, self.keep_prob) for size in self.lstm_size]
                                                      )
            self.initial_state = encoder_cell.zero_state(self.batch_size, tf.float32)
            # 透過dynamic_rnn對cell展開時間維度
            self.encoder_outputs  = tf.nn.dynamic_rnn(
                                                      encoder_cell, 
                                                      self.encoder_inputs,                                                    
                                                      initial_state=self.initial_state
                                                      )


    # Decoder with Attention ----------------------------------------------------------------------
    def build_atten_decoder(self):
        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.lstm_size[-1])
        #decoder_cell = tf.contrib.rnn.GRUCell(self.lstm_size[-1])
        


        # Training Helper
        helper = tf.contrib.seq2seq.TrainingHelper(
                    self.decoder_inputs, self.batch_seq_len, time_major=True)


        # Project layer (full connecting layers)
        project_layer = layers_core.Dense(self.output_len , use_bias=False, name="output_projection")
        
        self.d_initial_state = decoder_cell.zero_state(self.batch_size, tf.float32)
        decoder = tf.contrib.seq2seq.BasicDecoder(
                                           cell=decoder_cell,
                                           helper=helper,
                                           initial_state=self.d_initial_state,
                                           output_layer=project_layer
                                                 )

        # Attention
        #attenetion_states: [batch_size, max_time, num_units]
        #attention_states = tf.transpose(self.encoder_outputs, [1, 0, 2]) #why?
        '''
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                                          num_units = self.num_units, 
                                          memory=self.encoder_outputs)

        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                                          decoder_cell,
                                          attention_mechanism,         
                                          alignment_history=False,
                                          cell_input_fn=None,
                                          output_attention=True,
                                          initial_cell_state=None,
                                          name=None)
        '''

        # Outout
        final_outputs, _, _  = tf.contrib.seq2seq.dynamic_decode(decoder)
        self.logits = final_outputs.rnn_output
        self.proba_prediction = tf.tanh(self.logits, name='predictions')


    def build_loss(self):
        with tf.name_scope('loss'):
            #self.y_reshaped = tf.reshape(self.targets,  self.logits.get_shape())
            loss =tf.losses.mean_squared_error(predictions=self.proba_prediction, labels=self.targets)
            #loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.y_reshaped)
            self.loss = tf.reduce_mean(loss)



    def build_optimizer(self):
        # Using "clipping" gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), self.grad_clip)
        train_op = tf.train.AdamOptimizer(self.learning_rate)
        self.optimizer = train_op.apply_gradients(zip(grads, tvars))
        
        
        

    # Training===============================================================================    
    def train(self, x, y, iters=10, save_path='./models', save_every_n=200, log_every_n=200):
        self.session = tf.Session()
        with self.session as sess:
            sess.run(tf.global_variables_initializer())
            # Train network

            new_state = sess.run(self.initial_state)
            for ite in range(iters):
                step = 0
                print('iters',ite)
                for i in range(len(x)):
                    step += 1
                    start = time.time()
                    
                    feed = {self.encoder_inputs: x[i], 
                            self.targets: y[i],
                            self.decoder_inputs : y[i],
                            self.keep_prob: self.train_keep_prob,
                            self.initial_state: new_state}
                    
                    batch_loss, new_state = sess.run([self.loss,
                                                      self.optimizer],
                                                      feed_dict=feed)
                    # print result
                    #self.print_result(x[i], y[i])
                    end = time.time()

                    # control the print lines
                    if step % log_every_n == 0:
                        print("=======================================================\n")
                        print('step: {} in iter: {}/{}... '.format(step, ite+1, iters),
                              'loss: {:.4f}... '.format(batch_loss),
                              '{:.4f} sec/batch'.format((end - start)))

                    if (step % save_every_n == 0):
                        self.saver.save(sess, os.path.join(save_path, 'model'), global_step=step)
                        #self.jodge(pred, target)
                        #print("Target: \n", target)
                        #print("PRED: \n",pred)


    def jodge(self, pred, target):
        pred = np.array(pred)
        pred = np.array(pred >= 0.5).astype(int)

        result = np.abs(pred - target)

        for i in range(result.shape[0]):
            np.savetxt("result_" + str(i) + ".csv", result[i], delimiter=',')
        return pred            
        

In [81]:
model_path = './seq2seq_models'
if os.path.exists(model_path) is False:
    os.makedirs(model_path)
input_data, target_data = batch_generator(x_input.values, y_label.values)
model = Seq2Seq()
model.train(input_data, 
            target_data,
            iters=2,
            save_path=model_path,
            save_every_n=1000,
            log_every_n =200
            )

52999
iters 0


ValueError: Could not flatten dictionary. Key had 6 elements, but value had 1 elements. Key: [<tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(1, 71) dtype=float32>, <tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(1, 71) dtype=float32>, <tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(1, 80) dtype=float32>, <tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(1, 80) dtype=float32>, <tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState_2/BasicLSTMCellZeroState/zeros:0' shape=(1, 41) dtype=float32>, <tf.Tensor 'encoder/MultiRNNCellZeroState/DropoutWrapperZeroState_2/BasicLSTMCellZeroState/zeros_1:0' shape=(1, 41) dtype=float32>], value: [None].

In [None]:
'''
epochs = 50
batch_size = 100
learning_rate = 1e-3
grad_clip = 0.3

# Model parameter 
encoder_embedding_size = 128
decoder_hidden_size = encoder_output_size = 128



class trainer_class():
    def train(self, model, data_transformer):
        #Define some parameter and the optimizer here 
        n_iter = 0
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(model.train_loss, tvars), grad_clip)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars))

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for epoch in range(0, epochs):
                input_batches, target_batches = data_transformer.mini_batches(batch_size=batch_size)
                for input_batch, target_batch in zip(input_batches, target_batches):    
                    input_length = np.array(input_batch[1])
                    target_length = np.array(target_batch[1])
                    input_batch = np.transpose(np.array(input_batch[0]))
                    target_batch = np.transpose(np.array(target_batch[0]))
                    n_iter +=1
                
                    if target_length.shape[0] == batch_size:
                        #Feed the batch into the training model 
                        output, loss, _ = sess.run([model.decoder_logits,model.train_loss,train_op],feed_dict={
                                model.encoder_inputs: input_batch,
                                model.decoder_targets: target_batch,
                                model.decoder_inputs: target_batch,
                                model.input_sequence_length: input_length,
                                model.decoder_sequence_length: target_length,
                                model.target_sequence_length: target_length
                            })
                        #Print loss and result after training 50 epoch
                        if n_iter % 50 == 0:
                            Predict_Words =[]
                            Input_words = []
                            # Two for loops are used to discard the "EOS"
                            for batch in output:
                                word = []
                                for prob in batch:
                                    Max=np.argmax(prob)
                                    if Max ==1 or Max ==2:
                                        continue
                                    char = data_transformer.vocab.idx2char[Max]
                                    word.append(char)
                                    tmp_word = ''.join(word)
                                Predict_Words.append(tmp_word)
                            for batch in np.transpose(input_batch):
                                word= data_transformer.vocab.indices_to_sequence(batch)
                                Input_words.append(word)
                            
                            print("-----{}epochs------------- ".format(n_iter))
                            print("loss: ",loss,"\n")
                            print("Input: ",Input_words,"\n")
                            print("Predict: ",Predict_Words,"")
                            print("--------------------------\n ")

                        

def main():
    data_transformer = DataTransformer('../dataset/Google-10000-English.txt', use_cuda=False)
    seq2seq_model = Model(encoder_embedding_size = encoder_embedding_size, 
                          encoder_output_size = encoder_output_size,
                          batch_size = batch_size,
                          vocab_size= data_transformer.vocab_size)
    trainer = trainer_class()
    trainer.train(seq2seq_model, data_transformer)

if __name__ == "__main__":
    main()



'''