In [4]:
%matplotlib inline

import math
import random
from PIL import Image
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

### Load MNIST data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Layer Fuctions
  - Linear layer

In [None]:
def linearLayer(inp, inp_dim, out_dim, with_relu=False, \
                with_dropout=False, use_contribute=False, prefix='hidden'):
    """ Linear layer
    Args:
        inp: input data, [batch_size, inp_dim]
        inp_dim: input dimension
        out_dim: output dimension
    Returns:
        out: Output tensor with the computed logits, [batch_size, out_dim]
             return (inp * weight + bias)
    """
    print '%s: %d-dim ->%d-dim (relu: %s, dropout: %s, contribute:%s)' \
            % (prefix, inp_dim, out_dim, with_relu, with_dropout, use_contribute)
    
    with tf.name_scope(prefix):
        if not use_contribute :
            weight = tf.Variable(TODO_FillHere, name='weights')
            bias = tf.Variable(tf.zeros([out_dim]), name='biases')
            out = tf.matmul(inp, weight) + bias
            
            if with_relu : TODO = True
            if with_dropout : TODO = True
                
        else :
            if with_dropout : TODO = True
            else : TODO = True
    return out

### Model Construction Functions
 - Network (3-layer NN)
     - $h_1 = W_1^Tx+b_1$  (hidden dimension=128)
     - $h_2= W_2^Th_1+b_2$ (hidden dimension=32)
     - $h_3= W_3^Th_2+b_3$ (hidden dimension=number of label=10)
     - $\hat{y} = \text{softmax}(h_3)$
 - Loss function
     - Cross-entropy loss
     - $-\sum_k{y_{i,k}\log{\hat{y}_{i,k}}}$
 - Optimization algorithm
     - gradient descent

In [3]:
def buildInputHolders():
    """ define placeholders for model inputs
    Returns:
        img_holder: images placeholder
        label_hodler: labels placeholder
    """
    img_holder = tf.placeholder(tf.float32, [None, 28*28])
    label_holder = tf.placeholder(tf.float32, [None, 10])
    
    return img_holder, label_holder
    
def buildInference(images, img_dim, label_num, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference.
    Args:
        images: Images placeholder, from inputs().
        hidden1_units: Size of the first hidden layer.
        hidden2_units: Size of the second hidden layer.
    Returns:
        pred: Output tensor with the computed logits.
    """
    # Hidden 1
    hidden1 = linearLayer(images, img_dim, hidden1_units, prefix='hidden1')
    # Hidden 2
    hidden2 = linearLayer(hidden1, hidden1_units,hidden2_units, prefix='hidden2')
    # Linear
    pred = linearLayer(hidden2, hidden2_units, \
                       label_num, with_relu=False, prefix='linear')
    return tf.nn.softmax(pred)

def buildLoss(preds, labels):
    """Calculates the loss from the predictions and the labels.
    Args:
        preds  : prediction tensor, [batch_size, NUM_CLASSES]
        labels : labels tensor, [batch_size]
    Returns:
        loss : loss tensor of type float
    """
    cross_entropy_loss = -tf.reduce_sum(labels * tf.log(preds), reduction_indices=[1])
    return tf.reduce_mean(cross_entropy_loss)

def buildAccuracy(pred, labels):
    """Calculates the accuracy from the predictions and the labels.
    Args:
        preds  : prediction tensor, [batch_size, NUM_CLASSES]
        labels : labels tensor, [batch_size]
    Returns:
        accuracy : accuracy tensor of type float
    """
    correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(labels,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return accuracy

def buildModel(params):
    """ Build Model
    Args:
        params : dictionary data for parameters {
            'batch_size' : batch size
            'lr' : learning rate
            }
    Returns:
        imgs : input images of model
        labels : input labels of model
        train_step : one step operation for training
        loss : Loss tensor
        acc : accuracy
    """
    imgs, labels = buildInputHolders()
    preds = buildInference(imgs, 28*28, 10, 128, 32)
    loss = buildLoss(preds, labels)
    acc = buildAccuracy(preds, labels)
    
    global_step = tf.Variable(.0, trainable=False)
    optimizer = tf.train.GradientDescentOptimizer(params['lr'])
    train_step = optimizer.minimize(loss, global_step=global_step)
    
    return imgs, labels, train_step, loss, acc

### Main Function

In [10]:
# define parameters
params = {}
params['batch_size'] = 100
params['lr'] = 0.5
params['total_batch'] = int(mnist.train.num_examples/params['batch_size'])

# build model
imgs, labels, train_step, loss, acc = buildModel(params)

In [11]:
batch_size = params['batch_size']
total_batch = params['total_batch']

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(10):
        total_cost = 0

        for i in range(total_batch):
            # batch 데이터를 가져옴
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            # 모델 업데이트 수행
            _, cost_val = sess.run([train_step, loss], \
                                   feed_dict={imgs: batch_xs, labels: batch_ys})
            total_cost += cost_val

        print "Epoch:", "%02d" % (epoch + 1), \
            "Avg. cost =", '{:.3f}'.format(total_cost / total_batch)

    # 테스트 데이터에 대한 정확도
    print "Test Accuracy: ", sess.run(acc, \
                    feed_dict={imgs: mnist.test.images, labels: mnist.test.labels})

Epoch: 0001 Avg. cost = 2.303
Epoch: 0002 Avg. cost = 2.302
Epoch: 0003 Avg. cost = 2.302
Epoch: 0004 Avg. cost = 2.302
Epoch: 0005 Avg. cost = 2.302
Epoch: 0006 Avg. cost = 2.302
Epoch: 0007 Avg. cost = 2.302
Epoch: 0008 Avg. cost = 2.302
Epoch: 0009 Avg. cost = 2.302
Epoch: 0010 Avg. cost = 2.302
Test Accuracy:  0.1135


#### Note! 딥 러닝 모델의 성능이 오르는 데 크게 기여한 방법들
  - Activation function
    - ReLu
      - $f(x) = max(0,x)$
      - Non linearlity 함수
      - tf.nn.relu(features)를 이용하여 간단하게 구현 가능
          - features: relu를 적용 할 tensor
      - https://www.tensorflow.org/api_docs/python/tf/nn/relu
      - <img src="../resource/relu.png" width="250" height="250">
  - Regularization
    - Dropout
      - 데이터에서 임의의 값들을 0으로 대체 (모델의 overfitting을 완화)
      - tf.nn.dropout(x, keep_prob)를 이용하여 간단하게 구현 가능
          - x: dropout를 적용 할 tensor
          - keep_prob: 데이터를 유지 할 확률 (1: 모든 데이터 유지, 0: 모든 데이터를 0으로)
      - https://www.tensorflow.org/api_docs/python/tf/nn/dropout
      - <img src="../resource/dropout.png" width="400" height="300">
    - Others
      - L1 regularization
      - L2 regularization

#### ReLu 예제

In [None]:
x = tf.placeholder(tf.float32, shape=(1,10))
relu_x = tf.nn.relu(x)

with tf.Session() as sess:
    inp = np.random.randn(1,10)
    print "input x:\n", inp
    print "relu_x:\n", sess.run(relu_x, {x:inp})

#### Dropout 예제

In [None]:
x = tf.placeholder(tf.float32, shape=(1,10))
dropout_x = tf.nn.dropout(x, 0.5)

with tf.Session() as sess:
    inp = np.random.randn(1,10)
    print "input x:\n", inp
    print "dropout_x:\n", sess.run(dropout_x, {x:inp})