In [2]:
import os
import time 
import numpy as np
import pandas as pd
from __future__ import print_function

import tensorflow as tf

  from ._conv import register_converters as _register_converters


# Read Data

In [3]:
x_input = pd.read_csv('plc_x_reduce.csv')
y_label = pd.read_csv('plc_y_reduce.csv')

In [4]:
print("x_input size:", x_input.shape)
x_input.head()

x_input size: (52999, 71)


Unnamed: 0,X*000000,X*000007,X*000009,X*00000B,X*000010,X*000011,X*000012,X*000013,X*000014,X*000015,...,X*0000A5,X*0000A6,X*0000A7,X*0000A8,X*0000A9,X*0000C4,X*0000C7,X*000110,X*000119,X*00011C
0,1,1,1,1,1,1,1,0,1,1,...,0,0,1,0,0,1,1,1,1,1
1,1,1,1,1,1,1,1,0,1,1,...,0,0,1,0,0,1,1,1,1,1
2,1,1,1,1,1,1,1,0,1,1,...,0,0,0,0,0,1,1,1,1,1
3,1,1,1,1,1,1,1,0,1,1,...,0,0,0,0,0,1,1,1,1,1
4,1,1,1,1,1,1,1,0,1,1,...,0,0,0,0,0,1,1,1,1,1


In [5]:
print("x_label size:", y_label.shape)
y_label.head()

x_label size: (52999, 41)


Unnamed: 0,Y*000040,Y*000044,Y*000047,Y*00004C,Y*000060,Y*000061,Y*000062,Y*000063,Y*000065,Y*000066,...,Y*0000B2,Y*0000B3,Y*0000B5,Y*0000BC,Y*0000BD,Y*0000DA,Y*0000F6,Y*0000FA,Y*000108,Y*00011A
0,1,0,1,1,0,0,0,0,0,0,...,0,0,1,0,1,1,1,1,1,1
1,1,0,1,1,0,1,1,0,0,0,...,0,0,1,0,1,1,0,1,1,1
2,1,0,1,1,0,1,1,0,0,0,...,0,0,1,0,1,1,0,1,1,1
3,1,0,1,1,0,1,1,0,0,0,...,0,0,1,0,1,1,1,1,1,1
4,1,0,1,1,0,1,1,0,0,0,...,0,0,1,0,1,1,0,1,1,1


In [6]:
def batch_generator(x_input,
                    y_label,
                    batch_size=5, 
                    seq_len=10):
    
    batchs = int(y_label.shape[0] / seq_len / batch_size)
    print(batchs)
    
    input_data = []
    target_data = []
    
    
    for i in range(batchs):
        x = np.zeros(shape=(batch_size, seq_len, x_input.shape[1]))
        y = np.zeros(shape=(batch_size, seq_len, y_label.shape[1]))
        for b in range(batch_size):

            x[b, :, :] = x_input[b*i : b*i+seq_len, :]
            y[b, :, :] = y_label[b*i : b*i+seq_len, :]

        input_data.append(x)
        target_data.append(y)              
    return input_data, target_data

# Model

In [7]:
class N2N_LSTM:
    def __init__(self,
                 seq_max_len=10,  
                 input_len=71,
                 output_len=41,
                 batch_size=5,
                 lstm_size=[71, 80, 80, 41],
                 num_layers=4,
                 learning_rate=0.001,
                 grad_clip=2,
                 keep_prob=1.,
                 ):
            
        self.seq_max_len = seq_max_len
        self.input_len = input_len
        self.output_len = output_len
        self.batch_size = batch_size
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.train_keep_prob = keep_prob
        
        tf.reset_default_graph() #Clears the default graph stack and resets the global default graph
        self.build_inputs()
        self.build_lstm()
        self.build_loss()
        self.build_optimizer()
        self.saver = tf.train.Saver() #Saves and restores variables.
 

    
    def build_inputs(self):
        with tf.name_scope('inputs'):
            self.inputs = tf.placeholder(tf.float32, 
                                         shape=(self.batch_size, self.seq_max_len, self.input_len),
                                         name='inputs')
            self.targets = tf.placeholder(tf.float32,
                                          shape=(self.batch_size, self.seq_max_len, self.output_len),
                                          name='targets')
            self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
            
            
            
    def build_lstm(self):
        def get_a_cell(lstm_size, keep_prop):
            lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
            drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=self.train_keep_prob)
            return drop
        
        # tf.orthogonal_initializer()
        # tf.truncated_normal_initializer()
        with tf.variable_scope('lstm', initializer=tf.orthogonal_initializer()):
            cell = tf.nn.rnn_cell.MultiRNNCell(
                         [get_a_cell(size, self.keep_prob) for size in self.lstm_size]
                                              )
            self.initial_state = cell.zero_state(self.batch_size, tf.float32)
            
            # 透過dynamic_rnn對cell展開時間維度
            self.lstm_outputs, self.final_state = tf.nn.dynamic_rnn(cell, 
                                                                    self.inputs, 
                                                                    initial_state=self.initial_state)
            #print(self.lstm_outputs)
            
            
            '''
            # 透過lstm_outputs得到機率
            seq_output = tf.concat(self.lstm_outputs, 1)
            x = tf.reshape(seq_output, [-1, self.lstm_size[-1]])

            with tf.variable_scope('sigmoid'):
                sigmoid_w = tf.Variable(tf.truncated_normal([self.lstm_size[-1],self.output_len], stddev=0.1))
                sigmoid_b = tf.Variable(tf.zeros(self.output_len))

            self.logits = tf.matmul(x, sigmoid_w) + sigmoid_b
            '''
            self.proba_prediction = tf.tanh(self.lstm_outputs, name='predictions')

            
            
            
    def build_loss(self):
        with tf.name_scope('loss'):
            self.y_reshaped = tf.reshape(self.targets, self.lstm_outputs.get_shape())
            loss =tf.losses.mean_squared_error(predictions=self.lstm_outputs, labels=self.y_reshaped)
            #loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.y_reshaped)
            self.loss = tf.reduce_mean(loss)
            
            
            
            
    def build_optimizer(self):
        # 使用clipping gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), self.grad_clip)
        train_op = tf.train.AdamOptimizer(self.learning_rate)
        self.optimizer = train_op.apply_gradients(zip(grads, tvars))


            

    def train(self, x, y, iters=10, save_path='./models', save_every_n=200, log_every_n=200):
        self.session = tf.Session()
        with self.session as sess:
            sess.run(tf.global_variables_initializer())
            # Train network
            
            new_state = sess.run(self.initial_state)
            for ite in range(iters):
                step = 0
                print('iters',ite)
                for i in range(len(x)):
                    step += 1
                    start = time.time()
                    feed = {self.inputs: x[i], 
                            self.targets: y[i],
                            self.keep_prob: self.train_keep_prob,
                            self.initial_state: new_state}
                    batch_loss, new_state, _, pred, target = sess.run([self.loss,
                                                                       self.final_state,
                                                                       self.optimizer,
                                                                       self.proba_prediction,
                                                                       self.y_reshaped],
                                                                       feed_dict=feed)
                    # print result
                    #self.print_result(x[i], y[i])
                    end = time.time()

                    # control the print lines
                    if step % log_every_n == 0:
                        print("=======================================================\n")
                        print('step: {} in iter: {}/{}... '.format(step, ite+1, iters),
                              'loss: {:.4f}... '.format(batch_loss),
                              '{:.4f} sec/batch'.format((end - start)))
                        
                    if (step % save_every_n == 0):
                        self.saver.save(sess, os.path.join(save_path, 'model'), global_step=step)
                        self.jodge(pred, target)
                        #print("Target: \n", target)
                        #print("PRED: \n",pred)
 

    def jodge(self, pred, target):
        pred = np.array(pred)
        pred = np.array(pred >= 0.5).astype(int)
        
        result = np.abs(pred - target)
        np.savetxt("result.csv", result, delimiter=',')
        return pred

In [8]:
model_path = './models'
if os.path.exists(model_path) is False:
    os.makedirs(model_path)
input_data, target_data = batch_generator(x_input.values, y_label.values)
model = N2N_LSTM()
model.train(input_data, 
            target_data,
            iters=2000,
            save_path=model_path,
            save_every_n=1000,
            log_every_n =200
            )

1059
iters 0

step: 200 in iter: 1/5...  loss: 0.0827...  0.0040 sec/batch

step: 400 in iter: 1/5...  loss: 0.0453...  0.0030 sec/batch

step: 600 in iter: 1/5...  loss: 0.0332...  0.0040 sec/batch

step: 800 in iter: 1/5...  loss: 0.0367...  0.0030 sec/batch
FIRST OUTPUT: 
 [[[0.98700416 0.01051522 0.9810365  ... 0.98107594 0.9860108  0.9818101 ]
  [0.9869829  0.01099111 0.9810524  ... 0.98108023 0.9860027  0.98182   ]
  [0.98702383 0.01108973 0.9810825  ... 0.98113453 0.986026   0.981858  ]
  ...
  [0.9870617  0.01105573 0.9811142  ... 0.981161   0.98607296 0.98189074]
  [0.98706394 0.01099569 0.98110086 ... 0.98114634 0.98606896 0.9818772 ]
  [0.98706424 0.01096025 0.98109365 ... 0.98113656 0.98606616 0.98186994]]

 [[0.9675998  0.01770059 0.96318066 ... 0.97324985 0.9723766  0.9546489 ]
  [0.9675478  0.01879596 0.9630815  ... 0.973305   0.97246253 0.95471895]
  [0.96747136 0.01920935 0.9629474  ... 0.9732643  0.97248    0.95470417]
  ...
  [0.9676652  0.01910154 0.9629559  ... 0.9


step: 1000 in iter: 2/5...  loss: 0.0195...  0.0030 sec/batch
iters 2

step: 200 in iter: 3/5...  loss: 0.0216...  0.0040 sec/batch

step: 400 in iter: 3/5...  loss: 0.0120...  0.0060 sec/batch

step: 600 in iter: 3/5...  loss: 0.0103...  0.0030 sec/batch

step: 800 in iter: 3/5...  loss: 0.0110...  0.0030 sec/batch
FIRST OUTPUT: 
 [[[ 0.9959428  -0.01735546  0.99606234 ...  0.99164563  0.99524224
    0.9951197 ]
  [ 0.9971624  -0.01072815  0.994728   ...  0.9914172   0.9934258
    0.9946227 ]
  [ 0.99666756 -0.00469346  0.992444   ...  0.9899859   0.9918177
    0.9923976 ]
  ...
  [ 0.9966535  -0.00468474  0.9927226  ...  0.9899402   0.9920386
    0.99219996]
  [ 0.9966383  -0.00466647  0.99276286 ...  0.98996186  0.9921176
    0.99219793]
  [ 0.9966439  -0.00461177  0.9927787  ...  0.9899715   0.9921428
    0.9922057 ]]

 [[ 0.9977736   0.02172195  0.994823   ...  0.9949844   0.9948934
    0.9960802 ]
  [ 0.99757606  0.03455879  0.9953428  ...  0.9952296   0.9948787
    0.9965495 ]



step: 1000 in iter: 4/5...  loss: 0.0093...  0.0030 sec/batch
iters 4

step: 200 in iter: 5/5...  loss: 0.0137...  0.0030 sec/batch

step: 400 in iter: 5/5...  loss: 0.0075...  0.0030 sec/batch

step: 600 in iter: 5/5...  loss: 0.0077...  0.0040 sec/batch

step: 800 in iter: 5/5...  loss: 0.0087...  0.0030 sec/batch
FIRST OUTPUT: 
 [[[ 9.9691486e-01 -1.4468502e-03  9.9820745e-01 ...  9.9645954e-01
    9.9844992e-01  9.9760443e-01]
  [ 9.9881113e-01 -1.9602948e-03  9.9851936e-01 ...  9.9656677e-01
    9.9810261e-01  9.9842346e-01]
  [ 9.9896467e-01 -1.2136683e-03  9.9803573e-01 ...  9.9702436e-01
    9.9793482e-01  9.9836212e-01]
  ...
  [ 9.9774051e-01 -1.8705669e-03  9.9711943e-01 ...  9.9528080e-01
    9.9674976e-01  9.9727374e-01]
  [ 9.9901748e-01 -2.4772869e-04  9.9808794e-01 ...  9.9761188e-01
    9.9808085e-01  9.9840337e-01]
  [ 9.9257916e-01 -1.4439883e-03  9.9267173e-01 ...  9.9215651e-01
    9.9447906e-01  9.9469453e-01]]

 [[ 9.9911100e-01  6.3529373e-03  9.9817681e-01 ...

In [80]:
input_data[0].shape

(5, 10, 71)

In [81]:
len(input_data)

1059