# 뉴럴 네트워크 학습 알고리즘 구현

## Import modules

- 보통은 Keras로 하지만, 지금은 학습목적으로 tensorflow방식 그대로 사용

In [3]:
import tensorflow as tf

## 하이퍼파라미터 정의

In [4]:
EPOCHS = 10

## 네트워크 구조 정의

In [5]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()  ## 상속자인 tf.keras.Model 클래스의 __init__ 해주기
        self.flatten = tf.keras.layers.Flatten(input_shape=(28,28))  ## 28 X 28행렬데이터를 벡터형태로 flatten
        self.dense1 = tf.keras.layers.Dense(32, activation='relu') ## 첫번째 layer에는 32개의 뉴런 사용
        self.dense2 = tf.keras.layers.Dense(64, activation='relu')
        self.dense3 = tf.keras.layers.Dense(128, activation='relu')
        self.dense4 = tf.keras.layers.Dense(256, activation='relu')
        self.dense5 = tf.keras.layers.Dense(10, activation='softmax')
        
    def call(self, x, trainig=None, mask=None):
        x = self.flatten(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        return self.dense5(x)

## 학습(train) 함수 구현

- image : 입력값
- labels : 출력값

In [10]:
@tf.function
def train_step(model, image, labels, loss_object, optimizer, train_loss, train_accuracy):
    
    # forwarding
    with tf.GradientTape() as tape: ## 이 안에서 하는 연산은 모두 미분을 계산해놓음  -> 나중에 편리하게 뽑아쓸 수 있음
        predictions = model(image) # batch_size=32. label종류=10가지 일때 --> (32 X 10) 의 형태로 나올 것.
        loss = loss_object(labels, predictions) 
        
    # backwarding
    gradients = tape.gradient(loss, model.trainable_variables) # loss를 모든 trainable_variables로 미분
    optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # zip()  ->  gradients와 trainable_variables를 같이 iteration 할 수 있는 iteration 객체
        
    train_loss(loss)
    train_accuracy(labels, predictions)

## 테스트 함수 구현

In [11]:
@tf.function
def test_step(model, image, labels, loss_object, test_loss, test_accuracy):
    predictions = model(image) # batch_size=32. label종류=10가지 일때 --> (32 X 10) 의 형태로 나올 것.
    loss = loss_object(labels, predictions) 
    
    test_loss(loss)
    test_accuracy(labels, predictions)

## 데이터 불러오기

In [12]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1024).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


## 모델 생성

In [18]:
model = MyModel()

## 손실함수 및 최적화 알고리즘 정의

In [19]:
print(y_train, y_train.shape) ## y의 범주들이 one-hot encoding되어있지 않고 각각이 범주이름을 나타낸다 --> SparseCategoricalCrossentropy 사용
## y의 범주들이 one-hot encoding되어있으면 --> CategoricalCrossentropy 사용

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

[5 0 4 ... 5 6 8] (60000,)


## 성능 지표 정의

In [20]:
train_loss = tf.keras.metrics.Mean(name="train_loss")
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="train_accuracy")

test_loss = tf.keras.metrics.Mean(name="test_loss")
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="test_accuracy")

## 학습 루프 구현

In [23]:
for epoch in range(EPOCHS):
    for image, labels in train_ds:
        train_step(model, image, labels, loss_object, optimizer, train_loss, train_accuracy)
        
    for image, labels in test_ds:
        test_step(model, image, labels, loss_object, test_loss, test_accuracy)
    
    print('Epoch {}: loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'.format(epoch+1, 
                                                                                       train_loss.result(), 
                                                                                       train_accuracy.result() * 100, 
                                                                                       test_loss.result(), 
                                                                                       test_accuracy.result() * 100))
    
    ## 누적되는것을 방지하기 위해 reset해줌
    train_loss.reset_states()
    train_accuracy.reset_states()

    test_loss.reset_states()
    test_accuracy.reset_states()

W1014 20:18:21.086135 24680 base_layer.py:1814] Layer my_model_3 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Epoch 1: loss: 0.24200297892093658, Accuracy: 92.78500366210938, Test Loss: 0.13502158224582672, Test Accuracy: 95.84000396728516
Epoch 2: loss: 0.10781527310609818, Accuracy: 96.59666442871094, Test Loss: 0.11546571552753448, Test Accuracy: 96.51000213623047
Epoch 3: loss: 0.08006967604160309, Accuracy: 97.46166229248047, Test Loss: 0.09669629484415054, Test Accuracy: 97.23999786376953
Epoch 4: loss: 0.06349801272153854, Accuracy: 97.95166778564453, Test Loss: 0.10453332960605621, Test Accuracy: 97.04000091552734
Epoch 5: loss: 0.053288742899894714, Accuracy: 98.30500030517578, Test Loss: 0.0958881601691246, Test Accuracy: 97.38999938964844
Epoch 6: loss: 0.04579765349626541, Accuracy: 98.48500061035156, Test Loss: 0.09782060235738754, Test Accuracy: 97.29000091552734
Epoch 7: loss: 0.036919910460710526, Accuracy: 98.78666687011719, Test Loss: 0.1066141352057457, Test Accuracy: 97.48999786376953
Epoch 8: loss: 0.03473953157663345, Accuracy: 98.90333557128906, Test Loss: 0.106197603046