# RNN


In [13]:
!type python 
from tqdm import tqdm
from platform import python_version
from warnings import filterwarnings
filterwarnings('ignore')
import tensorflow as tf
import numpy as np
import os
import sys
print(python_version())
print(tf.__version__)
# use GPU
conf = tf.ConfigProto()
conf.gpu_options.allow_growth = True
# print(tf.test.is_gpu_available())

python is /home/kddlab/anaconda3/envs/swyoo_dl/bin/python
3.6.9
1.12.0


In [14]:
# using tensorflow_gpu=1.12.0, dataset can be downloaded
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()
train_x = train_x.astype(np.float32).reshape(-1, T*D) / 255.0  # (784,)
test_x = test_x.astype(np.float32).reshape(-1, T*D) / 255.0
train_y = train_y.astype(np.int32)
test_y = test_y.astype(np.int32)
valid_x, train_x = train_x[:5000], train_x[5000:]
valid_y, train_y = train_y[:5000], train_y[5000:]
train_x = train_x.reshape([-1, T, D])
test_x = test_x.reshape([-1, T, D])
valid_x = valid_x.reshape([-1, T, D])

print('train_x.shape :', train_x.shape)
print('valid_x.shape :', valid_x.shape)
print('test_x.shape :', test_x.shape)

train_x.shape : (55000, 28, 28)
valid_x.shape : (5000, 28, 28)
test_x.shape : (10000, 28, 28)


In [15]:
""" helper functions """
# yield mini-batch function
def get_minibatch(X, Y, b):
    """
    inputs: 
    - X: data features, shape (N, D)
    - Y: labels, shape (N, )
    - b: batch size, shape ()
    
    outputs:
    - x_mini, y_mini: minibatch dataset
    """
    step = len(X) // b
    # yield minibatch for each step
    for indices in np.array_split(np.random.permutation(len(X)), step):
        x_mini, y_mini = X[indices], Y[indices]
        yield x_mini, y_mini

In [27]:
""" layers module"""
class Model:
    # computation graph 
    def __init__(self, T=28, D=28, H=150, DH=128, K=10, L=3, beta=0.01):
        """
        hyper parameters
        - T: number of pixel @vertical line in a image 
        - D: number of pixel @horizontal line in a image
        - H: considering information @horizentoal line and vertical line, transform 28 size to 150 size latent vector
        - DH: unit size of dense layer 
        - K: probabilties that logits for 10 class
        - L: number of RNN layers 
        Model API
        __init__(self, T=28, D=28, H=150, DH=128, K=10, L=3, beta=0.01):
        """
        # feed
        self.X = tf.placeholder(dtype=tf.float32, shape=[None, T, D])
        self.Y = tf.placeholder(dtype=tf.int64, shape=[None])
        self.dropout = tf.placeholder(tf.float32)
        
        # reg룰 사용하면, L2 loss term들이 list로 모이고, tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)를 통해 얻는다.
        reg = tf.contrib.layers.l2_regularizer(scale=beta)
        init = tf.contrib.layers.xavier_initializer()
        
#         with tf.variable_scope("RNN_layer", regularizer=reg, initializer=init):
#             # outputs: hidden features for each time step 
#             # state: hidden feature for last time step, that is, outputs[-1]
#             # state는 이미지한장에 대해 encoding된 정보가 들어있다.
#             cell = tf.nn.rnn_cell.BasicRNNCell(num_units=H)
#             outputs, states = tf.nn.dynamic_rnn(cell, self.X, dtype=tf.float32)
    
#         with tf.variable_scope("Dense_layer", regularizer=reg, initializer=init):
#             # dense layer 
#             logits = tf.layers.dense(states, K)
        
        with tf.variable_scope("RNN_layer", regularizer=reg, initializer=init):
            # outputs: hidden features for each time step 
            # states: tuple information ((cell state, hidden state), ..) layer갯수 만큼의 tuple
            # state는 이미지한장에 대해 encoding된 정보가 들어있다.
            cells = [tf.nn.rnn_cell.LSTMCell(num_units=H) for layer in range(L)]
            block = tf.nn.rnn_cell.MultiRNNCell(cells)
            outputs, states = tf.nn.dynamic_rnn(block, self.X, dtype=tf.float32)
            
        with tf.variable_scope("Dense_layer", regularizer=reg, initializer=init):
            # dense layer 
            # input은 block에서 가장 마지막 layer의 마지막 시간의 hidden state값으로 한다.(포괄적인 encoding된 정보) 
            d1 = tf.layers.dense(states[-1][-1], units=DH)
            d1 = tf.nn.dropout(d1, keep_prob=self.dropout)
            logits = tf.layers.dense(d1, units=K)
        
        with tf.variable_scope("loss_layer"):
            reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.Y, logits=logits))
            self.loss += tf.reduce_sum(reg_losses)  # scalar
        
        # evaluation metric
        self.pred = tf.argmax(logits, axis=1)
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.Y), dtype=tf.float32))
        
    # train and evaluation  
    def fit(self, config, train_x, train_y, valid_x, valid_y, epoch, lr, b, dr, keep_prob=0.7, 
            save=False, SAVE_FILE='./models_mnist/model', log=False, LOG_DIR='./models_mnist'):
        """
        hyper parameters
        - epoch = 10
        - lr = 0.001
        - b = 150 # minibatch size
        - dr = 0.97 # learning rate decay rate
        - keep_prob = 0.7
        """
        step = tf.get_variable(name="global_step", shape=(), initializer=tf.zeros_initializer(), trainable=False)
        # each decay_step, learning rate will be decreased by decay_rate
        # decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps) 
        lr = tf.train.exponential_decay(learning_rate=lr, global_step=step, decay_steps=100, decay_rate=dr, staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(self.loss, global_step=step)
        
        _, H, W = train_x.shape
        
        # save options 
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        # tensorboard visualization
        if log:
            train_loss = tf.get_variable(name='train_loss', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            valid_acc = tf.get_variable(name='valid_acc', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            train_loss_summ = tf.summary.scalar('train_loss', train_loss)
            valid_acc_summ = tf.summary.scalar('valid_acc', valid_acc)
            summ_op = tf.summary.merge([train_loss_summ, valid_acc_summ])
        
        with tf.Session(config=config) as sess:
            # initialize all variables 
            sess.run(tf.global_variables_initializer())
            # add graph to tensorboard
            if log:
                print("@terminal: $ tensorboard --logdir={}".format(LOG_DIR))
                writer = tf.summary.FileWriter(LOG_DIR, session=sess)
                writer.add_graph(sess.graph)
            # minibatch training 
            k = 0
            for e in range(epoch):
                loss_train = 0
                for i, xy in tqdm(enumerate(get_minibatch(train_x, train_y, b)), desc='Train'):
                    # minibatch dataset
                    x, y = xy
                    feed = {self.X: x, self.Y: y, self.dropout: keep_prob}
                    loss_mini, _ = sess.run([self.loss, optimizer], feed_dict=feed)
                    loss_train += loss_mini
                loss_train = loss_train / (i+1)
                
                # evaluation
                acc_valid = self.accuracy.eval(feed_dict={self.X: valid_x, self.Y: valid_y, self.dropout: 1})
                print('epoch {} \t| loss: {:.4f} \t| acc_valid: {:.4f} \t| lr: {:0.5} '.format(e+1, loss_train, acc_valid, lr.eval()))
                if log:
                    sess.run([train_loss.assign(loss_train), valid_acc.assign(acc_valid)])
                    summary = sess.run(summ_op)
                    writer.add_summary(summary, global_step=k)
                    k += 1
            print("Training End")
            if save: 
                saver.save(sess, SAVE_FILE)
                print("save model @{}".format(SAVE_FILE))
    # restore model and evaluation
    def test(self, config, test_x, test_y, SAVE_FILE='./models_mnist/model'):
        """
        restore trained weights and evaluate a model
        """
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        with tf.Session(config=config) as sess:
            saver.restore(sess, SAVE_FILE)
            print("restore is completed")
            acc_test = self.accuracy.eval(feed_dict={self.X: test_x, self.Y: test_y, self.dropout: 1})
            print("acc_test: {}".format(acc_test))
            
    def summary(self):
        # print("=============================================")
        # print("list of all parameters")
        # print("=============================================")
        # for x in tf.global_variables():
        #     print(x)
        
        print("=============================================")
        print("list of all trainable parameters")
        print("=============================================")
        for x in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
            print(x)
        
        # print("=============================================")
        # print("list of parameters reflected regularization ")
        # print("=============================================")
        # for x in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
        #     print(x)

In [28]:
tf.reset_default_graph()
_, T, D = train_x.shape
H, K, L = 150, 10, 3
model = Model(T=T, D=D, H=H, K=K, L=L, beta=0.01)
model.fit(conf, train_x, train_y, valid_x, valid_y, epoch=10, lr=0.001, b=150, dr=0.97, keep_prob=0.7, save=True, log=True)
model.summary()
model.test(conf, test_x, test_y)

Train: 0it [00:00, ?it/s]

@terminal: $ tensorboard --logdir=./models_mnist


Train: 366it [00:08, 45.34it/s]
Train: 5it [00:00, 42.75it/s]

epoch 1 	| loss: 1.9373 	| acc_valid: 0.8676 	| lr: 0.00091267 


Train: 366it [00:07, 47.22it/s]
Train: 5it [00:00, 46.76it/s]

epoch 2 	| loss: 0.9158 	| acc_valid: 0.9114 	| lr: 0.00080798 


Train: 366it [00:07, 47.15it/s]
Train: 5it [00:00, 47.65it/s]

epoch 3 	| loss: 0.7787 	| acc_valid: 0.9086 	| lr: 0.00073742 


Train: 366it [00:07, 47.21it/s]
Train: 5it [00:00, 47.63it/s]

epoch 4 	| loss: 0.7054 	| acc_valid: 0.9364 	| lr: 0.00065284 


Train: 366it [00:07, 47.32it/s]
Train: 5it [00:00, 47.42it/s]

epoch 5 	| loss: 0.6598 	| acc_valid: 0.9420 	| lr: 0.00057795 


Train: 366it [00:07, 47.29it/s]
Train: 5it [00:00, 47.16it/s]

epoch 6 	| loss: 0.6225 	| acc_valid: 0.9474 	| lr: 0.00052748 


Train: 366it [00:07, 47.07it/s]
Train: 5it [00:00, 46.39it/s]

epoch 7 	| loss: 0.5948 	| acc_valid: 0.9488 	| lr: 0.00046698 


Train: 366it [00:07, 47.22it/s]
Train: 5it [00:00, 47.43it/s]

epoch 8 	| loss: 0.5789 	| acc_valid: 0.9496 	| lr: 0.00041341 


Train: 366it [00:07, 47.41it/s]
Train: 5it [00:00, 44.31it/s]

epoch 9 	| loss: 0.5616 	| acc_valid: 0.9492 	| lr: 0.00037731 


Train: 366it [00:07, 47.22it/s]


epoch 10 	| loss: 0.5487 	| acc_valid: 0.9586 	| lr: 0.00033403 
Training End
save model @./models_mnist/model
list of all trainable parameters
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0' shape=(178, 600) dtype=float32_ref>
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0' shape=(600,) dtype=float32_ref>
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0' shape=(300, 600) dtype=float32_ref>
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0' shape=(600,) dtype=float32_ref>
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_2/lstm_cell/kernel:0' shape=(300, 600) dtype=float32_ref>
<tf.Variable 'RNN_layer/rnn/multi_rnn_cell/cell_2/lstm_cell/bias:0' shape=(600,) dtype=float32_ref>
<tf.Variable 'Dense_layer/dense/kernel:0' shape=(150, 128) dtype=float32_ref>
<tf.Variable 'Dense_layer/dense/bias:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'Dense_layer/dense_1/kernel:0' shape=(128, 10) dtype=float32_ref>
<tf.Varia