### 역전파(back propagation) 알고리즘 
우리의 목푯값과 실제 모델이 예측한 예측값이 얼마나 차이 나는지 구한 후, 오차값을 다시 뒤로 전파해가며 가중치(weight)들을 업데이트하는 과정

### 기울기 소실 문제(Vanishing Gradient) 
깊은 층의 모델에선 역전파시에 전달되는 손실 함수(loss function)의 gradient 값에<br> 활성화 함수인 sigmoid 함수의 0에 가까운 기울기 값이 계속해서 곱해지면서 결국 가중치 업데이트가 잘 안되는 문제가 발생

### ReLU & sigmoid
모델의 층이 깊은 경우 히든층의 활성화 함수가 ‘relu’일 때와 ‘sigmoid’일 때의 모델 정확도를 확인

* 손실 함수(loss) : 'sparse_categorical_crossentropy'
* 최적화 알고리즘(optimizer) : 'adam'
* 평가 방법(metrics): ['accuracy']

In [3]:
import tensorflow as tf

import logging, os
logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

### make_model_relu

In [4]:
def make_model_relu():
    
    model_relu = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    return model_relu

### make_model_sig

In [5]:
def make_model_sig():
    
    model_sig = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    return model_sig

### model scores

In [6]:
def main():
    
    # MNIST 데이터를 불러오고 전처리합니다.
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    
    model_relu = make_model_relu()  # 히든층들의 활성화 함수로 relu를 쓰는 모델 
    model_sig = make_model_sig()   # 히든층들의 활성화 함수로 sigmoid를 쓰는 모델 
    
    model_relu.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    model_sig.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    
    model_relu.summary() 
    model_sig.summary()
    
    model_relu_history = model_relu.fit(x_train, y_train, epochs = 5 , verbose = 0) # batch_size= ,
    print('\n')
    model_sig_history = model_sig.fit(x_train, y_train, epochs = 5 , verbose = 0) # batch_size= ,
    
    scores_relu = model_relu.evaluate(x_test, y_test)
    scores_sig = model_sig.evaluate(x_test, y_test)
    
    print('\naccuracy_relu: ', scores_relu[-1])
    print('accuracy_sig: ', scores_sig[-1])
    
    return model_relu_history, model_sig_history

if __name__ == "__main__":
    main()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 32)                25120     
_________________________________________________________________
dense_23 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_24 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_25 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_26 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_27 (Dense)             (None, 32)               

### 전체 코드

In [1]:
import tensorflow as tf

import logging, os
logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

'''
1. relu layers
'''

def make_model_relu():
    
    model_relu = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    return model_relu
    
'''
2. sigmoid layers
'''
    
def make_model_sig():
    
    model_sig = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(32, activation='sigmoid'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    return model_sig

'''
3. 테스트 데이터로 모델 성능 평가 및 비교
'''

def main():
    
    # MNIST 데이터 전처리
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    
    model_relu = make_model_relu()  # 히든층들의 활성화 함수로 relu를 쓰는 모델 
    model_sig = make_model_sig()   # 히든층들의 활성화 함수로 sigmoid를 쓰는 모델 
    
    model_relu.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    model_sig.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    
    model_relu.summary()
    model_sig.summary()
    
    model_relu_history = model_relu.fit(x_train, y_train, epochs = 5 , verbose = 0) # batch_size= ,
    print('\n')
    model_sig_history = model_sig.fit(x_train, y_train, epochs = 5 , verbose = 0) # batch_size= ,
    
    scores_relu = model_relu.evaluate(x_test, y_test)
    scores_sig = model_sig.evaluate(x_test, y_test)
    
    print('\naccuracy_relu: ', scores_relu[-1])
    print('accuracy_sig: ', scores_sig[-1])
    
    return model_relu_history, model_sig_history

if __name__ == "__main__":
    main()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_4 (Dense)              (None, 32)                1056      
______________________________