<a href="https://colab.research.google.com/github/JoongseokPark/machine_learning_2023/blob/main/%EA%B8%B0%EA%B3%84%ED%95%99%EC%8A%B5_7%EC%A3%BC%EC%B0%A8_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##가중치의 초기화

초기 가중치의 설정은 학습 효율에 많은 영향을 끼친다 

1. 초기 가중치가 모두 0인 경우
> 다음 레이어로 넘기는 값이 0이 되어 가중치 변화가 없다
2. 초기 가중치가 랜덤한 경우
> 최종적인 값들이 0.5에 치우쳐 여러층의 레이어를 사용하는 의미가 없어진다 
3. 사용하는 초기화 방법은 활성화 함수에 따라 다르다
> tanh() , sigmoid() = xavier initialization
> ReLU() = h initialization


##Regularization(규제)

- 과대적합 방지 방법
- 특정 가중치가 한없이 커지는 것을 방지

1. L1 Regularization 
>가중치 업데이트 시 작은 가중치는 0으로 수렴하고, 중요한 가중치만 남김
2. L2 Regularization
>L1이 가중치를 0으로 수렴하게 만들어 미분 불가능한 것을 보완하여, 가중치의 크기가 직접적으로 영향을 끼치도록 한다 

##Drop Out
- 과대적합 방지 방법
- 은닉층의 노드에 탈락확률 P를 적용한다
- 매 학습마다 확률로 노드가 탈락하여, 노드가 모든 학습을 진행하지 않아 과대적합이 적어진다  

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#데이터셋 로드 
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# 가중치 초기화 

In [None]:
#256개 노드를 활성화 함수와 함께 초기화 
#kernel_initializer = 'uniform'
dense = tf.keras.layers.Dense(256,kernel_initializer='uniform',activation='relu')

In [None]:
#kernel_initializer = 'normal'
dense = tf.keras.layers.Dense(256,kernel_initializer='normal',activation='relu')

In [None]:
#kernel_initializer = 'he_normal'
dense = tf.keras.layers.Dense(256,kernel_initializer='he_normal',activation='relu')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(10,activation='softmax')
])
model.summary()

In [None]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
history = model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
#가중치 초기 설정을 해준 경우
model2 = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,kernel_initializer='he_normal',activation='relu'),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.Dense(10,activation='softmax')
])
model2.summary()

In [None]:
model2.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
history2 = model2.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
#두 모델 비교
plt.figure(figsize=(5,3))
plt.plot(np.arange(1,11),history.history['val_loss'],color = 'red',label='No Setting')
plt.plot(np.arange(1,11),history2.history['val_loss'],color = 'blue',label='He init')
plt.legend()
plt.show()

# Regularization
1. L1
> 가중치의 절대값에 상수를 곱한(0.001) 비용이 추가
2. L2 
> 가중치의 제곱에 상수를 곱한(0.001) 비용이 추가

In [None]:
#규제 L2 적용 
regula = tf.keras.regularizers.l2(0.001)
model_L2 = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dense(32,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dense(10,activation='softmax')
])
model_L2.summary()

In [None]:
model_L2.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
historyL2 = model_L2.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
#규제 L1 적용 
regula = tf.keras.regularizers.l1(0.001)
model_L1 = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dense(32,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dense(10,activation='softmax')
])
model_L1.summary()

In [None]:
model_L1.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
historyL1 = model_L1.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
plt.figure(figsize=(5,3))
plt.plot(np.arange(1,11),history.history['val_loss'],color = 'red',label='No Setting')
plt.plot(np.arange(1,11),historyL2.history['val_loss'],color = 'blue',label='L2')
plt.plot(np.arange(1,11),historyL1.history['val_loss'],color = 'green',label='L1')
plt.legend()
plt.show()

## Drop Out
1. 노드가 확률적으로 학습에 참여하지 않는다 
2. Regularization의 방법중 하나 

In [None]:
regula = tf.keras.regularizers.l1(0.01)
model_Drop = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32,kernel_initializer='normal',kernel_regularizer=regula,activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10,activation='softmax')
])
model_Drop.summary()

In [None]:
model_Drop.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
historyDrop = model_Drop.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
plt.figure(figsize=(5,3))
plt.plot(np.arange(1,11),history.history['val_loss'],color = 'red',label='No Setting')
plt.plot(np.arange(1,11),historyDrop.history['val_loss'],color = 'blue',label='Dropout')
plt.plot(np.arange(1,11),historyL1.history['val_loss'],color = 'green',label='L1')
plt.legend()
plt.show()

## Batch Normalization(배치 정규화)

1. 각 층에서 넘겨주는 값을 조정 

In [None]:
model_Batch = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(32,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10,activation='softmax')
])
model_Batch.summary()

In [None]:
model_Batch.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
historyBatch = model_Batch.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
plt.figure(figsize=(5,3))
plt.plot(np.arange(1,11),history.history['val_loss'],color = 'red',label='No Setting')
plt.plot(np.arange(1,11),historyBatch.history['val_loss'],color = 'blue',label='Batch')
plt.legend()
plt.show()

## Activation Function

1. ReLU
2. LeakyReLU

In [None]:
# 활성화 함수 LeakyReLU 사용 
model_Leaky = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(64),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LeakyReLU(alpha=0.2),
    tf.keras.layers.Dense(32),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LeakyReLU(alpha=0.2),
    tf.keras.layers.Dense(10,activation='softmax')
])
model_Leaky.summary()

In [None]:
model_Leaky.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
historyLeaky = model_Leaky.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)

In [None]:
plt.figure(figsize=(5,3))
plt.plot(np.arange(1,11),history.history['val_loss'],color = 'red',label='No Setting')
plt.plot(np.arange(1,11),historyLeaky.history['val_loss'],color = 'blue',label='Leaky')
plt.legend()
plt.show()