In [None]:
from tensorflow.keras.datasets import imdb
import numpy as np

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

def vectorize_sequences(sequences, dimension=10000):
    # 크기가 (len(sequences), dimension))이고 모든 원소가 0인 행렬을 만듭니다
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.  # results[i]에서 특정 인덱스의 위치를 1로 만듭니다
    return results

# 훈련 데이터를 벡터로 변환합니다
x_train = vectorize_sequences(train_data)
# 테스트 데이터를 벡터로 변환합니다
x_test = vectorize_sequences(test_data)
# 레이블을 벡터로 변환합니다
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers

smodel = models.Sequential()
smodel.add(layers.Dense(6, activation='relu', input_shape=(10000,)))
smodel.add(layers.Dense(6, activation='relu'))
smodel.add(layers.Dense(1, activation='sigmoid'))


smodel.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

In [None]:
history = model.fit(x_train, y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_test,y_test))

In [None]:
shistory = smodel.fit(x_train, y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_test,y_test))

In [None]:
epochs = range(1,21)

original_val_loss = history.history['val_loss']
s_val_loss = shistory.history['val_loss']

In [None]:
import matplotlib.pyplot as plt

plt.plot(epochs, original_val_loss, 'b+',label='Original')
plt.plot(epochs, s_val_loss, 'o', label='Smaller')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()

In [None]:
big_model = models.Sequential()
big_model.add(layers.Dense(1024, activation='relu', input_shape=(10000,)))
big_model.add(layers.Dense(1024, activation='relu'))
big_model.add(layers.Dense(1, activation='sigmoid'))


big_model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

big_history = big_model.fit(x_train, y_train,
                            epochs = 20,
                            batch_size = 512,
                            validation_data=(x_test,y_test))

In [None]:
epochs = range(1,21)

original_val_loss = history.history['val_loss']
s_val_loss = shistory.history['val_loss']
big_val_loss = big_history.history['val_loss']

import matplotlib.pyplot as plt

plt.plot(epochs, original_val_loss, 'b+',label='Original')
plt.plot(epochs, s_val_loss, 'o', label='Smaller')
plt.plot(epochs, big_val_loss, 'x', color='r', label='Big')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()

In [None]:
original_loss = history.history['loss']
s_loss = shistory.history['loss']
big_loss = big_history.history['loss']

import matplotlib.pyplot as plt

plt.plot(epochs, original_loss, 'b+',label='Original')
plt.plot(epochs, s_loss, 'o', label='Smaller')
plt.plot(epochs, big_loss, 'x', color='r', label='Big')
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()

용량이 큰 네트워크는 훈련 손실이 빠르게 0에 가까워진다. 용량이 많은 네트워크일수록 더 빠르게 훈련 데이터를 모델링 할 수 있어서 훈련 손실이
낮아진다. 하지만 더욱 과대적합에 민감해진다. (훈련과 검증 손실 사이에 큰 차이가 발생.)

In [None]:
from tensorflow.keras import regularizers

l2_model = models.Sequential()
l2_model.add(layers.Dense(16, kernel_regularizer=regularizers.l2(0.001),
                          activation='relu',
                          input_shape=(10000,)))
l2_model.add(layers.Dense(16, kernel_regularizer=regularizers.l2(0.001),
                          activation='relu'))
l2_model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
l2_model.compile(optimizer='rmsprop',
                 loss='binary_crossentropy',
                 metrics=['acc'])

In [None]:
l2_model_hist = l2_model.fit(x_train, y_train,
                             epochs = 20,
                             batch_size=512,
                             validation_data=(x_test,y_test))

In [None]:
l2_model_val_loss = l2_model_hist.history['val_loss']

epochs = range(1,21)

plt.plot(epochs, original_val_loss, 'b+',label='Original')
plt.plot(epochs, l2_model_val_loss, 'o', label='L2-regularized model' )
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()

두 모댈이 동일한 파라미터 수를 가지고 있더라도 L2 규제를 사용한 모델이 기본 모델 보다 훨씬 더 과대적합에 잘 견디고 있음

# 과제
L1 (alpha=0.001) 모델과 비교

In [None]:
dpt_model = models.Sequential()
dpt_model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
dpt_model.add(layers.Dropout(0.5))
dpt_model.add(layers.Dense(16, activation='relu'))
dpt_model.add(layers.Dropout(0.5))
dpt_model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
dpt_model.compile(optimizer='rmsprop',
                  loss = 'binary_crossentropy',
                  metrics=['acc'])

In [None]:
dpt_history = dpt_model.fit(x_train, y_train,
                            epochs=20,
                            batch_size=512,
                            validation_data=(x_test, y_test))

In [None]:
dpt_model_val_loss = dpt_history.history['val_loss']

epochs = range(1,21)

plt.plot(epochs, original_val_loss, 'b+',label='Original')
plt.plot(epochs, dpt_model_val_loss, 'o', label='L2-regularized model' )
plt.xlabel('Epochs')
plt.ylabel('Validation loss')
plt.legend()
plt.show()

신경망에서 과대적합을 방지하기 위한 방법

- 훈련 데이터를 더 모은다.
- 네트워크의 용량을 감소 시킨다
- 가중치 규제를 추가한다.
- 드롭아웃을 추가한다.