In [3]:
%matplotlib inline

import math
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

## example code에서 성능이 낮았던 multi-layer NN의 성능을 올려봅시다.
### Problem 1. 파라미터 초기화의 중요성 확인
  - all zero 초기화는 좋은 weight 초기화 방법이 아닙니다.
  - linear layer의 weight를 normal distribution으로 초기화 하도록 구현
      - hint: "normal"을 tensorflow 홈페이지에서 검색
      - mean은 0으로, stv는 0.1로 설정
  - 약 92% accuracy를 가지도록 learning rate 찾기
  
### Problem 2. Activation function의 중요성 확인
  - linearLayer 함수에서 linear layer의 결과 값에 ReLu 적용
  - linearLayer 함수에서 with_relu=True로 설정
  - 약 97% accuracy를 가짐

### Problem 3. Dropout 적용
  - linearLayer 함수에서 linear layer의 결과 값 (혹은 Relu를 적용한 값)에 dropout 적용
  - keep_prob은 0.8로 적용
  - linearLayer 함수에서 with_dropout=True로 설정
  - 약 97% accuracy를 가짐 (relu만 적용했을때보다 약간 성능이 떨어짐)

### Problem 4. 
  - 자주 쓰이는 layer는 이미 구현되어 제공됩니다. 제공되는 함수를 이용하여 모델을 정의해봅시다.
  - linearLayer 함수의 인풋 파라미터 use_contribute를 True로 바꾼 후, linearLayer 함수의 역할을 하는 레이어를 아래 링크에서 찾아서 적용해봅시다.
      - https://www.tensorflow.org/api_guides/python/contrib.layers
  - linearLayer 함수에서 use_contribute=True로 설정

### Problem 5. 더 deep한 NN을 만들어 봅시다.
  - 4-layer NN을 구현하고, 성능이 나오는 파라미터 (hidden layer의 dimension들, learning rate)를 찾기
  - note: buildModel 함수의 buildInference 함수 호출하는 부분도 수정 필요


### Load MNIST data

In [14]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Layer Fuctions
  - Linear layer

In [None]:
def linearLayer(inp, inp_dim, out_dim, with_relu=False, \
                with_dropout=False, prefix='hidden', use_contribute=False):
    """ Linear layer
    Args:
        inp: input data, [batch_size, inp_dim]
        inp_dim: input dimension
        out_dim: output dimension
    Returns:
        out: Output tensor with the computed logits, [batch_size, out_dim]
             return (inp * weight + bias)
    """
    print '%s: %d-dim ->%d-dim (relu: %s, dropout: %s, contribute:%s)' \
            % (prefix, inp_dim, out_dim, with_relu, with_dropout, use_contribute)
    
    with tf.name_scope(prefix):
        if not use_contribute :
            # Problem 1
            # TODO: initializing weight using normal distribution, mean (1.0) and std (0.1) 
            
            weight = tf.Variable(TODO_FillHere, name='weights') # TODO
            bias = tf.Variable(tf.zeros([out_dim]), name='biases')
            out = tf.matmul(inp, weight) + bias
            
            if with_relu : 
                # Problem 2
                # TODO: Apply relu operation
                
            if with_dropout :
                # Problem 3
                # TODO: Apply dropout operation
                
                
        else :
            # Problem 4
            # TODO: Using tf.contrib.layers utility
            
    return out

### Model Construction Functions

In [43]:
def buildInputHolders():
    """ define placeholders for model inputs
    Returns:
        img_holder: images placeholder
        label_hodler: labels placeholder
    """
    img_holder = tf.placeholder(tf.float32, [None, 28*28])
    label_holder = tf.placeholder(tf.float32, [None, 10])
    
    return img_holder, label_holder


def buildInference(images, img_dim, label_num, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference.
    Args:
        images: Images placeholder, from inputs().
        hidden1_units: Size of the first hidden layer.
        hidden2_units: Size of the second hidden layer.
    Returns:
        pred: Output tensor with the computed logits.
    """
    # Hidden 1
    hidden1 = linearLayer(images, img_dim, hidden1_units, prefix='hidden1')
    # Hidden 2
    hidden2 = linearLayer(hidden1, hidden1_units,hidden2_units, prefix='hidden2')
    
    # Problem 5
    # TODO: adding additional linear layer
    
    # Linear
    # 마지막 레이블에 대한 확률값을 예측하는 layer에서는 relu와 dropout을 적용하지 않습니다.
    pred = linearLayer(hidden2, hidden2_units, \
                       label_num, with_relu=False, \
                       with_dropout=False, prefix='linear')
    
    return tf.nn.softmax(pred)

def buildLoss(preds, labels):
    """Calculates the loss from the predictions and the labels.
    Args:
        preds  : prediction tensor, [batch_size, NUM_CLASSES]
        labels : labels tensor, [batch_size]
    Returns:
        loss : loss tensor of type float
    """
    cross_entropy_loss = -tf.reduce_sum(labels * tf.log(preds), reduction_indices=[1])
    return tf.reduce_mean(cross_entropy_loss)

def buildAccuracy(pred, labels):
    """Calculates the accuracy from the predictions and the labels.
    Args:
        preds  : prediction tensor, [batch_size, NUM_CLASSES]
        labels : labels tensor, [batch_size]
    Returns:
        accuracy : accuracy tensor of type float
    """
    correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(labels,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return accuracy

def buildModel(params):
    """ Build Model
    Args:
        params : dictionary data for parameters {
            'batch_size' : batch size
            'lr' : learning rate
            }
    Returns:
        imgs : input images of model
        labels : input labels of model
        train_step : one step operation for training
        loss : Loss tensor
        acc : accuracy
    """
    imgs, labels = buildInputHolders()
    preds = buildInference(imgs, 28*28, 10, 128, 32) # for Problem 5, should be changed
    loss = buildLoss(preds, labels)
    acc = buildAccuracy(preds, labels)
    
    global_step = tf.Variable(.0, trainable=False)
    optimizer = tf.train.GradientDescentOptimizer(params['lr'])
    train_step = optimizer.minimize(loss, global_step=global_step)
    
    return imgs, labels, train_step, loss, acc

### Main Function
  - 각 문제를 푼 후 아래 두 셀을 실행하세요.

In [44]:
# define parameters
params = {}
params['batch_size'] = 100
params['lr'] = 0.5
params['total_batch'] = int(mnist.train.num_examples/params['batch_size'])

# build model
imgs, labels, train_step, loss, acc = buildModel(params)

In [45]:
batch_size = params['batch_size']
total_batch = params['total_batch']

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(10):
        total_cost = 0

        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            _, cost_val = sess.run([train_step, loss], \
                                   feed_dict={imgs: batch_xs, labels: batch_ys})
            total_cost += cost_val

        print "Epoch:", "%04d" % (epoch + 1), \
            "Avg. cost =", '{:.3f}'.format(total_cost / total_batch)

    # 테스트 데이터에 대한 정확도
    print "Test Accuracy: ", sess.run(acc, \
                    feed_dict={imgs: mnist.test.images, labels: mnist.test.labels})

Epoch: 0001 Avg. cost = 0.449
Epoch: 0002 Avg. cost = 0.196
Epoch: 0003 Avg. cost = 0.142
Epoch: 0004 Avg. cost = 0.112
Epoch: 0005 Avg. cost = 0.093
Epoch: 0006 Avg. cost = 0.077
Epoch: 0007 Avg. cost = 0.067
Epoch: 0008 Avg. cost = 0.057
Epoch: 0009 Avg. cost = 0.050
Epoch: 0010 Avg. cost = 0.045
Test Accuracy:  0.9744
