# MNIST

### MNIST 데이터 받기

In [6]:
import os
import sys
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("data/", one_hot=True)
train_data = mnist.train.images
train_label = mnist.train.labels
test_data = mnist.test.images
test_label = mnist.test.labels

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


### MNIST 데이터 확인하기

In [7]:
# size of MNIST
print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

(55000, 784)
(55000, 10)
(10000, 784)
(10000, 10)


### Data Pre-processing (데이터 전처리)

# Implementation (1)


## Loss function (손실 함수) : Cross Entropy

# <center> \\( L(y_i, f(x_i; W)) = -\frac{1}{n}\sum_{i=1}^{n}\sum_{k=1} y_{i,j} log(f(x_i)_k)\\)</center>


#### get_cross_entropy_loss 함수의 내용을 완성하세요.
#### (Hint : (1) tf.reduce_mean(), tf.reduce_sum(), tf.log() (2) Tensor dimension에 유의 (3) log 함수 사용 시 epsilon 사용하세요.)

In [8]:
def get_cross_entropy_loss(y_true, y_hat, epsilon=1e-8):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('cross_entropy'):
        ###################################################################
        #                    Implementation 1                             #
        ###################################################################
        loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(y_hat), axis = 1))
    
    
    return loss

def get_accuracy(y_true, y_hat):
    """
    compute cross entropy

    Args:
        y_true: true label
        y_hat: predicted label
        epsilon: small value to prevent NaN in log

    Returns:
        cross entropy loss
    """
    with tf.name_scope('accuracy'):
        # Compare the highest indices between the predicted label and the true label
        correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_true, 1), name='correct_prediction')
        # Compute accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
    return accuracy

## Hyper-parameter (하이퍼 파라미터)

In [9]:
# Set hypyerparameters
learning_rate = 0.001
max_iter = 2000
batch_size = 100

# Implementation (2)
## Linear Classifier (선형 분류기)

## <center> \\( f(x) = W^Tx+b \\)</center>

### linear function을 완성하세요.
### (Hint : (1) weight, bias 선언 (2) tf.get_variable()의 initializer  (3) tf.matmul())

In [10]:
def fc(name, out_dim, inputs):
    """
    Args :
        Inputs : Input tensor
        out_dim : output dimension
        
    Returns:
        inputs * weight + bias
    """
    with tf.variable_scope(name):
        ###################################################################
        #                    Implementation 2                             #
        ###################################################################
        shp = inputs.get_shape().as_list()[-1]
        init = tf.truncated_normal([shp, out_dim], stddev=.01)
        weight = tf.get_variable('w', initializer=init)
        
        init = tf.constant(1.0, shape=[out_dim])
        bias = tf.get_variable('b', initializer=init)
        
        result = tf.matmul(inputs, weight) + bias
    
    return result

# Implementation (3)

## Model Setting

### 1. Training data 및 Test data의 각각의 image를 한 vector로 만들어서 train_data, test_data에 각각 저장하세요.
#### Hint) 데이터 차원.
### 2. Dataset로부터 받은 데이터(Image, label)를 담을 변수를 각각 x 및 y_true에 선언하세요.
#### Hint) tf.placeholder
### 3. Implementation (2)에서 구현한 linear classifier 함수값과 softmax 함수를 통한 prediction 값을 y_hat에 저장하세요.
#### Hint) tf.nn.softmax
### 4. 3으로부터 얻은 결과를 통해 Implementation (1)에서 구현한 loss function을 통해 얻은 loss를 cross_entropy에 저장하세요.

In [11]:
tf.reset_default_graph()
# Flatten data
###################################################################
#                    Implementation 3-1                           #
###################################################################
# train_data = None
# test_data = None

###################################################################
#                    Implementation 3-2                           #
###################################################################

x = tf.placeholder(tf.float32, [None, 784]) # 배치사이즈로 넣어줄 거라서 앞의 shape 는 None 이 된다.
y_true = tf.placeholder(tf.float32, [None, 10])

###################################################################
#                    Implementation 3-3                          #
###################################################################
h1 = fc('layer1', 512, x)
h1 = tf.nn.relu(h1)
h2 = fc('layer2', 64, h1)
h2 = tf.nn.relu(h2)
y_logits = fc('layer3', 10, h2)


y_hat = tf.nn.softmax(y_logits)

###################################################################
#                    Implementation 3-4                           #
###################################################################
cross_entropy = get_cross_entropy_loss(y_true, y_hat)


# Calculate accuracy
accuracy = get_accuracy(y_true, y_hat)
# Make gradient descent op
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# Make op to initialize declared variable
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # Initialize variables
    sess.run(init)
    
    # Training
    for step in range(max_iter):
        # Get batch data and label
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # train the network and calculate cross entropy
        _= sess.run(train_step, feed_dict={x: batch_x, y_true: batch_y}) # w, b 를 update하자
        loss = sess.run(cross_entropy, feed_dict={x: batch_x, y_true: batch_y}) # loss를 확인하자
        
        # calcualte accuracy
        acc = sess.run(accuracy, feed_dict={x: test_data, y_true: test_label})
        # print loss (cross entropy) and accuracy at every 10th step
        if (step + 1) % 10 == 0:
            print("{}th iteration, loss: {:.4f}, test accuracy: {:.4f}".format(step + 1, loss, acc))

10th iteration, loss: 2.2496, test accuracy: 0.3584
20th iteration, loss: 1.9549, test accuracy: 0.4820
30th iteration, loss: 1.3699, test accuracy: 0.4656
40th iteration, loss: 1.0314, test accuracy: 0.6863
50th iteration, loss: 0.6739, test accuracy: 0.7034
60th iteration, loss: 0.9111, test accuracy: 0.7647
70th iteration, loss: 0.5880, test accuracy: 0.7875
80th iteration, loss: 0.4280, test accuracy: 0.8200
90th iteration, loss: 0.5563, test accuracy: 0.8532
100th iteration, loss: 0.5077, test accuracy: 0.8555
110th iteration, loss: 0.3837, test accuracy: 0.8736
120th iteration, loss: 0.4810, test accuracy: 0.8757
130th iteration, loss: 0.3460, test accuracy: 0.8846
140th iteration, loss: 0.3631, test accuracy: 0.8801
150th iteration, loss: 0.3722, test accuracy: 0.8880
160th iteration, loss: 0.4169, test accuracy: 0.8772
170th iteration, loss: 0.5444, test accuracy: 0.8866
180th iteration, loss: 0.3670, test accuracy: 0.8934
190th iteration, loss: 0.2883, test accuracy: 0.8978
20

1550th iteration, loss: 0.0662, test accuracy: 0.9678
1560th iteration, loss: 0.1168, test accuracy: 0.9689
1570th iteration, loss: 0.0983, test accuracy: 0.9643
1580th iteration, loss: 0.0875, test accuracy: 0.9653
1590th iteration, loss: 0.1466, test accuracy: 0.9670
1600th iteration, loss: 0.1029, test accuracy: 0.9668
1610th iteration, loss: 0.0879, test accuracy: 0.9684
1620th iteration, loss: 0.0445, test accuracy: 0.9680
1630th iteration, loss: 0.1186, test accuracy: 0.9683
1640th iteration, loss: 0.2052, test accuracy: 0.9680
1650th iteration, loss: 0.1085, test accuracy: 0.9677
1660th iteration, loss: 0.0519, test accuracy: 0.9679
1670th iteration, loss: 0.1033, test accuracy: 0.9670
1680th iteration, loss: 0.0802, test accuracy: 0.9690
1690th iteration, loss: 0.0542, test accuracy: 0.9700
1700th iteration, loss: 0.0983, test accuracy: 0.9674
1710th iteration, loss: 0.0231, test accuracy: 0.9688
1720th iteration, loss: 0.0361, test accuracy: 0.9678
1730th iteration, loss: 0.05