# Binary String Classfication

다음의 문서를 참고하였습니다. 

* https://medium.com/@erikhallstrm/using-the-dynamicrnn-api-in-tensorflow-7237aba7f7ea#.sjdfcqe4m
* https://github.com/cozyhous/dynamic_rnn_example/blob/master/trainer.py
* https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-12-5-rnn_stock_prediction.py
* https://arxiv.org/pdf/1506.00019.pdf

In [2]:
import tensorflow as tf
import numpy as np

from tensorflow.contrib.rnn import RNNCell, BasicLSTMCell, MultiRNNCell, DropoutWrapper

## Creating Interactive Session

In [3]:
init_op = tf.global_variables_initializer()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.001, allow_growth=True)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(init_op)

## Data

Data는 3 dimentions 으로서 [Batch, Time Step, Data] 로 이루어집니다.

In [4]:
LABEL_SIZE = 100
TRAINING_SIZE = 50000
TEST_SIZE = 10000

def create_data(size=50000, maximum=1000, end_token=2):
    max_length = len(bin(maximum-1)[2:]) + 1
    rands = np.random.randint(0, maximum, size=500000)
    xs = np.zeros((size, 1, max_length), dtype='float32')
    ys = np.zeros((size, maximum), dtype='int32')
    for i in range(size):
        x = np.zeros(20, dtype='float32')
        rand_bin = bin(rands[i])[2:]+ str(end_token)
        rand_bin = list(map(float, list(rand_bin)))
        xs[i, 0, :len(rand_bin)] = rand_bin    
        ys[i, rands[i]] = 1
    
    return xs, ys

train_x, train_y = create_data(size=TRAINING_SIZE, maximum=LABEL_SIZE)
test_x, test_y = create_data(size=TEST_SIZE, maximum=LABEL_SIZE)

SENTENCE_LENGTH = train_x.shape[2]

In [5]:
def next_batch(x, y, size=50):
    idx = np.random.randint(x.shape[0] - size)
    return x[idx:idx+size], y[idx:idx+size]

sample_x, sample_y = next_batch(train_x, train_y)
print(sample_x.shape, sample_y.shape)

(50, 1, 8) (50, 100)


## LSTM Model

### DynamicRNN

TensorFlow의 dynamic_rnn은 batch input shape [Batch Size, Time Step Size, Input Size] 형식을 받습니다.<br>


In [7]:
def sigmoid_cross_entropy(prediction, targets):
    """
    위의 prediction에서 음수도 나올수 있는데.. 이때 음수를 log에 넣으면 nan이 떠버립니다.
    따라서 sigmoid를 통해서 0~1사이의 값으로 보정을 해줍니다. 
    """
    prediction = tf.sigmoid(prediction)
    loss = - tf.reduce_sum(targets * tf.log(prediction))
    return loss

def softmax_cross_entropy(prediction, targets):
    """
    마찬가지로 tf.nn.softmax_? 또는 tf.nn.sigmoid 등등을 TensorFlow에서 제공하는데.. 
    cross entropy function을 사용시 log에서 문제가 될 수 있는 음수 부분을 없애기 위해서 API가 이렇게 제공되고 있음
    """
    delta = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=targets)
    loss = tf.reduce_mean(delta)
    return loss

def mean_squared_error(prediction, targets):
    delta = tf.square(targets - prediction)
    loss = tf.reduce_mean(delta)
    return loss

def root_mean_squared_error(preditction, targets):
    p = mean_squared_error(prediction, targets)
    loss = tf.sqrt(p)
    return loss

loss_f = root_mean_squared_error


with tf.variable_scope('test' + str(np.random.randint(0, 100000))):
    inputs = tf.placeholder('float32', shape=[None, None, SENTENCE_LENGTH], name='inputs') # [batch, time, in]
    targets = tf.placeholder('float32', shape=[None, LABEL_SIZE], name='targets') # [batch, time, out]
    
    cell = BasicLSTMCell(LABEL_SIZE, forget_bias=1.0, state_is_tuple=True)
    init_state = cell.zero_state(1, 'float32')

    outputs, state = tf.nn.dynamic_rnn(cell, inputs, initial_state=init_state, time_major=True)
    prediction = outputs[:, -1] # (?, 100)
    
    loss = loss_f(prediction, targets)
    train_fn = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
    
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [28]:
def train(x, y, n_epoch=3, batch_size=50):
    global state
    
    for epoch in range(1, n_epoch+1):
        costs = []
        for step in range(int(x.shape[0]/batch_size)):
            sample_x, sample_y = next_batch(x, y, size=batch_size)
            cost, _ = sess.run([loss, train_fn], feed_dict={inputs: sample_x, targets: sample_y})
            costs.append(np.sum(cost))

        cost = sum(costs)/float(step + 1)
        print(f'[{epoch}] cost: {cost}')

train(train_x, train_y)

[1] cost: 0.07489023901894688
[2] cost: 0.048166033580899235
[3] cost: 0.03409451477788389


In [30]:
def evaluate(x, y):
    test_size = x.shape[0]
    DATA_LIMIT = 200
    
    n_correct = 0
    global_step = 0
    for i in range(0, test_size, DATA_LIMIT):
        y_preds = sess.run(prediction, feed_dict={inputs: x[i:i+DATA_LIMIT]})

        for j in range(DATA_LIMIT):
            y_pred = np.argmax(y_preds[j, 0])
            y_true = np.argmax(y[j, 0])
            if y_pred == y_true:
                n_correct += 1
                
            global_step += 1
    
    print(f'테스트 갯수: {test_size}, 맞은 갯수: {n_correct}')
    print('accuracy:', n_correct/float(test_size))

evaluate(test_x, test_y)

테스트 갯수: 10000, 맞은 갯수: 10000
accuracy: 1.0
