In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist

In [2]:

def load_mnist():
    (train_data, train_labels), (test_data, test_labels) = mnist.load_data()
    train_data = np.expand_dims(train_data, axis = -1) # -1은 끝에서 첫번째를 의미함 
    test_data = np.expand_dims(test_data, axis = -1) 
# 채널 확장하기. 기본적으로 인풋 넣을 때 [batch_size, height, width, channel]
# 인데 여기서는 gray scale이라 마지막 1이 생략되어 있음. 즉 
#[N, 28, 28] -> [N, 28, 28, 1]

    train_data, test_data = normalize(train_data, test_data)
# 이 이미지 값들은 [0~255] 값을 갖는데 이걸 [0~1]사이 값으로 normalize
 
    train_labels = to_categorical(train_labels, 10) #[N,] -> [N, 10]
    train_labels = to_categorical(test_labels, 10) #one hot encoding
    return train_data, train_labels, test_data, test_labels

def normalize (train_data, test_data):
    train_data = train_data.astype(np.float32) / 255.0
    test_data = test_data.astype(np.float32) / 255.0
    return train_data, test_data

In [3]:
# define function

def flatten() : #shape을 펼쳐준다
    return tf.keras.layers.Flatten()

def dense(label_dim, weight_init) :  #fully connected layer 사용을 위해 Dense를 하나 만들어준다 
    return tf.keras.layers.Dense(units=label_dim, use_bias=True, kernel_initializer=weight_init)

def relu() :
    return tf.keras.layers.Activation(tf.keras.activations.relu)


In [4]:
 # create network by class 이용

class create_model(tf.keras.Model):
    def __init__(self, label_dim):  #label dim- 최종적으로 몇개 아웃풋?
        super(create_model, self).__init__()
        weight_init = tf.keras.initializers.RandomNormal()
        self.model = tf.keras.Sequential()  #Seq - >list 자료구조 타입
        self.model.add(flatten()) #[N, 28, 28, 1] -> [N, 784]
        
        for i in range(2):
            # [ N, 784] -> [N, 256] -> [N, 256]
            self.model.add(dense(256, weight_init)) #fully connected layer 이용을 위해 flatten
            self.model.add(relu())
        self.model.add(dense(label_dim, weight_init)) #[N, 256] -> [N,10]
    def call (self, x, training = None, mask = None):
        x = self.model(x)
        return x
    
    

In [5]:
# create network by function 

def create_model(label_dim):
    weight_init=tf.keras.initializers.RandomNormal()
    model=tf.keras.Sequential()
    model=add(flatten())
    for i in range(2):
        model.add(dense(256, weight_init))
        model.add(relu())
    model.add(dense(label_dim, weight_init))
    return model 

In [6]:
# define loss

def loss_fn(model, images, labels):
    logits = model(images, training = True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
    return loss

def accuracy_fn(model, images, labels):
    logits = model(images, training=False)
    prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(labels, -1))
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
    return accuracy 

def grad(model, images, labels):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, images, labels)
    return tape.gradient(loss, model.variables)

In [7]:

""" dataset """
train_x, train_y, test_x, test_y = load_mnist()

""" parameters """
learning_rate = 0.001
batch_size = 128

training_epochs = 1
training_iterations = len(train_x) // batch_size

label_dim = 10

train_flag = True

""" Graph Input using Dataset API """
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).\
    shuffle(buffer_size=100000).\
    prefetch(buffer_size=batch_size).\
    batch(batch_size, drop_remainder=True)

test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).\
    shuffle(buffer_size=100000).\
    prefetch(buffer_size=len(test_x)).\
    batch(len(test_x))

ValueError: Dimensions 60000 and 10000 are not compatible

In [8]:
# Check point 는 중간에 끊겼을 때 그 순간의 weight 값을 불러올 수 있음
# Global step -> 몇번째 iteration인지 

#dataset Iterator
train_iterator = train_dataset.make_one_shot_interator()
test_iterator = test_dataset.make_one_shot_interator()

#model
network = create_model(label_dim)

#training
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)


NameError: name 'train_dataset' is not defined