# 14장. 오버피팅의 진단과 해결책

In [None]:
import numpy as np
import matplotlib.pylab as plt
import tensorflow as tf
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# 학습용 데이터 생성
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

# 테스트용 데이터 생성
x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

In [None]:
# 특성값
features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.W1 = tf.Variable(tf.random.normal([1, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        yhat = tf.matmul(h2, self.W3) + self.b3
        return yhat

In [None]:
# 모델 선언
model = MyModel()

# 손실함수
loss = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))

# 수치최적화 알고리즘
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)

np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

# 손실함수 기록용 변수
loss_train_history = []
loss_test_history = []

In [None]:
# 스토캐스틱 방법
MaxEpochs = 10000
batch_size = 10

from helper import generate_batches

for epoch in range(MaxEpochs):
    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features_train))
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch))
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    loss_train = loss(labels_train, model(features_train))
    loss_test = loss(labels_test, model(features_test))
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)

In [None]:
def visualize_l2(pred, x_train, y_train, x_test, y_test):
    from matplotlib.patches import Rectangle
    plt.plot(x_train, y_train, '.k', markersize=7, markerfacecolor='none', label='Train Data')
    plt.plot(x_train, pred,  '--k', label='Model')
    plt.plot(x_test, y_test, 'xk', markersize=4, markerfacecolor='none', label='Test Data')
    currentAxis = plt.gca()
    for k, (xx,yy) in enumerate(zip(x_train, y_train)):
        error = pred[k] - yy
        currentAxis.add_patch(Rectangle((xx, yy), 
                                        error, error,
                              alpha=0.1, facecolor='gray', edgecolor='k'))
    plt.grid()
    plt.axis('equal')
    plt.legend(loc='upper left')
    plt.xlabel('x')
    plt.ylabel('y')

In [None]:
prediction_values = model(features_train)
final_loss = loss(labels_train, prediction_values)
prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test)
loss_train_history.append(final_loss)
loss_test_history.append(final_loss_test)

plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(loss_train_history, '-k', label='Train', alpha=0.7)
plt.plot(loss_test_history, '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
ind = [50,200,1000,2000,3000,4000,6000,8000,10000]
for j in ind:
    plt.plot(j,loss_train_history[j].numpy(), 'k*', markersize=7)
    plt.plot(j,loss_test_history[j].numpy(), '*', color='gray', markersize=7)
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([-.01, 0.07])
plt.show()

## 14.1 학습 반복 횟수 재설정

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.W1 = tf.Variable(tf.random.normal([1, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        yhat = tf.matmul(h2, self.W3) + self.b3
        return yhat

In [None]:
# 모델 선언
model = MyModel()

# 수치최적화 알고리즘
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)

np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

# 손실함수 기록용 변수
loss_train_history = []
loss_test_history = []

# 스토캐스틱 방법
MaxEpochs = 1000
batch_size = 10

for epoch in range(MaxEpochs):
    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features_train))
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch))
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    loss_train = loss(labels_train, model(features_train))
    loss_test = loss(labels_test, model(features_test))
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)

In [None]:
prediction_values = model(features_train)
final_loss = loss(labels_train, prediction_values)
prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test)
loss_train_history.append(final_loss)
loss_test_history.append(final_loss_test)

plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

## 14.2 Regularization 함수 추가

### 14.2.1 $L^2$ Regularization

In [None]:
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.W1 = tf.Variable(tf.random.normal([1, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        yhat = tf.matmul(h2, self.W3) + self.b3
        return yhat

In [None]:
model = MyModel()

l2_norm = lambda W1, W2, W3: tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)

In [None]:
MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
beta = 1E-5
loss = lambda y, yhat, W1, W2, W3: tf.reduce_mean(tf.square( yhat - y )) + beta*(tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3))
mse = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))

In [None]:
np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

loss_train_history = []
loss_test_history = []
mse_train_history = []
mse_test_history = []
l2_norm_history = []

In [None]:
for epoch in range(MaxEpochs):
    loss_train = loss(labels_train, model(features_train), model.W1, model.W2, model.W3)
    loss_test = loss(labels_test, model(features_test), model.W1, model.W2, model.W3)
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)
    mse_train_val = mse(labels_train, model(features_train))
    mse_train_history.append(mse_train_val)
    mse_test_val = mse(labels_test, model(features_test))
    mse_test_history.append(mse_test_val)
    l2_norm_val = l2_norm(model.W1, model.W2, model.W3)
    l2_norm_history.append(l2_norm_val)

    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features_train), model.W1, model.W2, model.W3)
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch), model.W1, model.W2, model.W3)
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [None]:
prediction_values = model(features_train)
final_loss = loss(labels_train, prediction_values, model.W1, model.W2, model.W3)
prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test, model.W1, model.W2, model.W3)
loss_train_history.append(final_loss)
loss_test_history.append(final_loss_test)
l2_norm_val = l2_norm(model.W1, model.W2, model.W3)
mse_train_val = mse(labels_train, model(features_train))
mse_train_history.append(mse_train_val)
mse_test_val = mse(labels_test, model(features_test))
mse_test_history.append(mse_test_val)

In [None]:
plt.title("L2 norm = {:1.2f} at Epoch {}".format(l2_norm_val, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(mse_train_history, '-k', label='Train', alpha=0.7)
plt.plot(mse_test_history, '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.ylim([-.01, 0.07])
plt.show()

In [None]:
plt.plot(l2_norm_history,'-k')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('$L^2$-norm')
plt.show()

#### Dense( ) 이용

In [None]:
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
num_hidden1 = 10 
num_hidden2 = 5
W1 = tf.constant_initializer(tf.Variable(tf.random.normal([1, num_hidden1], seed=624)).numpy())
b1 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden1], seed=624)).numpy()) 

W2 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)).numpy())
b2 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden2], seed=624)).numpy())

W3 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)).numpy())
b3 = tf.constant_initializer(tf.Variable(tf.random.normal([1], seed=624)).numpy())

class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5
        beta = 1E-5/2

        self.hidden1 = tf.keras.layers.Dense(num_hidden1, activation=tf.nn.sigmoid, 
            kernel_initializer=W1, bias_initializer=b1, name='hidden_1', 
            kernel_regularizer=tf.keras.regularizers.L2(beta))

        self.hidden2 = tf.keras.layers.Dense(num_hidden2, activation=tf.nn.sigmoid, 
            kernel_initializer=W2, bias_initializer=b2, name='hidden_2', 
            kernel_regularizer=tf.keras.regularizers.L2(beta))

        self.out = tf.keras.layers.Dense(1, activation = None, 
            kernel_initializer=W3, bias_initializer=b3, name='output', 
            kernel_regularizer=tf.keras.regularizers.L2(beta))

    def call(self, x):
        h1 = self.hidden1(x)
        h2 = self.hidden2(h1)
        yhat = self.out(h2)
        return yhat

In [None]:
model = MyModel()

MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
loss = tf.keras.losses.MeanSquaredError()

np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

model.compile(optimizer = optimizer, loss = loss)
history = model.fit(shuffled_x_train, shuffled_y_train, epochs = MaxEpochs, batch_size = batch_size, 
                    shuffle = False, validation_data = (features_test, labels_test))

In [None]:
l2_norm = lambda W1, W2, W3: tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)
l2_norm_val = l2_norm(model.layers[0].get_weights()[0], model.layers[1].get_weights()[0], model.layers[2].get_weights()[0])

prediction_values = model(features_train)
final_loss = model.evaluate(features_train,labels_train)
prediction_values_test = model(features_test)
final_loss_test = model.evaluate(features_test,labels_test)

mse = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))
mse_train_val = mse(labels_train, model(features_train))
mse_test_val = mse(labels_test, model(features_test))
beta = 1E-5
print('mse:',mse_train_val.numpy(), mse_test_val.numpy())
print('l2_norm:', l2_norm_val.numpy())
print('loss:', mse_train_val.numpy() + beta*l2_norm_val.numpy(), mse_test_val.numpy() + beta*l2_norm_val.numpy())

In [None]:
plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.title("L2 norm = {:1.2f} at Epoch {}".format(l2_norm_val, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(history.history['loss'], '-k', label='Train', alpha=0.7)
plt.plot(history.history['val_loss'], '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([-.01, 0.07])
plt.show()

### 14.2.2 $L^1$ Regularization

In [None]:
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.W1 = tf.Variable(tf.random.normal([1, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        yhat = tf.matmul(h2, self.W3) + self.b3
        return yhat

In [None]:
model = MyModel()

l1_norm = lambda W1, W2, W3: tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2)) + tf.reduce_sum(tf.abs(W3))

MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
beta = 1E-4
loss = lambda y, yhat, W1, W2, W3: tf.reduce_mean(tf.square( yhat - y )) + beta*(tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2)) + tf.reduce_sum(tf.abs(W3)))
mse = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))

np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

loss_train_history = []
loss_test_history = []
mse_train_history = []
mse_test_history = []
l1_norm_history = []

In [None]:
for epoch in range(MaxEpochs):
    loss_train = loss(labels_train, model(features_train), model.W1, model.W2, model.W3)
    loss_test = loss(labels_test, model(features_test), model.W1, model.W2, model.W3)
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)
    mse_train_val = mse(labels_train, model(features_train))
    mse_train_history.append(mse_train_val)
    mse_test_val = mse(labels_test, model(features_test))
    mse_test_history.append(mse_test_val)
    l1_norm_val = l1_norm(model.W1, model.W2, model.W3)
    l1_norm_history.append(l1_norm_val)

    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features_train), model.W1, model.W2, model.W3)
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch), model.W1, model.W2, model.W3)
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    if epoch in [50, 1000, 4000, 8000]:
        prediction_values = model(features_train)
        l1_norm_val = l1_norm(model.W1, model.W2, model.W3)
        plt.title("L1 norm = {:1.2f} at Epoch {}".format(l1_norm_val, epoch))
        visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
        plt.show()

In [None]:
prediction_values = model(features_train)
final_loss = loss(labels_train, prediction_values, model.W1, model.W2, model.W3)
prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test, model.W1, model.W2, model.W3)
loss_train_history.append(final_loss)
loss_test_history.append(final_loss_test)
l1_norm_val = l1_norm(model.W1, model.W2, model.W3)
mse_train_val = mse(labels_train, model(features_train))
mse_train_history.append(mse_train_val)
mse_test_val = mse(labels_test, model(features_test))
mse_test_history.append(mse_test_val)

In [None]:
plt.title("L1 norm = {:1.2f} at Epoch {}".format(l1_norm_val, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(mse_train_history, '-k', label='Train', alpha=0.7)
plt.plot(mse_test_history, '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.ylim([-.01, 0.07])
plt.show()

In [None]:
plt.plot(l1_norm_history,'-k')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('$L^1$-norm')
plt.show()

#### Dense( ) 이용

In [None]:
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
num_hidden1 = 10 
num_hidden2 = 5
W1 = tf.constant_initializer(tf.Variable(tf.random.normal([1, num_hidden1], seed=624)).numpy())
b1 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden1], seed=624)).numpy()) 

W2 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)).numpy())
b2 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden2], seed=624)).numpy())

W3 = tf.constant_initializer(tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)).numpy())
b3 = tf.constant_initializer(tf.Variable(tf.random.normal([1], seed=624)).numpy())

class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5
        beta = 1E-4

        self.hidden1 = tf.keras.layers.Dense(num_hidden1, activation=tf.nn.sigmoid, 
            kernel_initializer=W1, bias_initializer=b1, name='hidden_1', 
            kernel_regularizer=tf.keras.regularizers.L1(beta))

        self.hidden2 = tf.keras.layers.Dense(num_hidden2, activation=tf.nn.sigmoid, 
            kernel_initializer=W2, bias_initializer=b2, name='hidden_2', 
            kernel_regularizer=tf.keras.regularizers.L1(beta))

        self.out = tf.keras.layers.Dense(1, activation = None, 
            kernel_initializer=W3, bias_initializer=b3, name='output', 
            kernel_regularizer=tf.keras.regularizers.L1(beta))

    def call(self, x):
        h1 = self.hidden1(x)
        h2 = self.hidden2(h1)
        yhat = self.out(h2)
        return yhat

In [None]:
model = MyModel()

MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
loss = tf.keras.losses.MeanSquaredError()

np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

model.compile(optimizer = optimizer, loss = loss)
history = model.fit(shuffled_x_train, shuffled_y_train, epochs = MaxEpochs, batch_size = batch_size, 
                    shuffle = False, validation_data = (features_test, labels_test))

In [None]:
l1_norm = lambda W1, W2, W3: tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2)) + tf.reduce_sum(tf.abs(W3))
l1_norm_val = l1_norm(model.layers[0].get_weights()[0], model.layers[1].get_weights()[0], model.layers[2].get_weights()[0])

prediction_values = model(features_train)
final_loss = model.evaluate(features_train,labels_train)
prediction_values_test = model(features_test)
final_loss_test = model.evaluate(features_test,labels_test)

mse = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))
mse_train_val = mse(labels_train, model(features_train))
mse_test_val = mse(labels_test, model(features_test))
beta = 1E-4
print('mse:',mse_train_val.numpy(), mse_test_val.numpy())
print('l1_norm:', l1_norm_val.numpy())
print('loss:', mse_train_val.numpy() + beta*l1_norm_val.numpy(), mse_test_val.numpy() + beta*l1_norm_val.numpy())

In [None]:
plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.title("L1 norm = {:1.2f} at Epoch {}".format(l1_norm_val, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(history.history['loss'], '-k', label='Train', alpha=0.7)
plt.plot(history.history['val_loss'], '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([-.01, 0.07])
plt.show()

## 14.3 드롭아웃(Dropout)

In [None]:
tf.random.set_seed(0)
data = np.arange(1,11).astype(np.float32)
layer = tf.keras.layers.Dropout(rate = .5, input_shape = (10,))

outputs = layer(data, training = True)
print(outputs)

In [None]:
layer = tf.keras.layers.Dropout(rate = .9, input_shape = (10,))
outputs = layer(data, training = True)
print(outputs)

layer = tf.keras.layers.Dropout(rate = .8, input_shape = (10,))
outputs = layer(data, training = True)
print(outputs)

In [None]:
outputs = layer(data)
print(outputs)

In [None]:
# 데이터
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.W1 = tf.Variable(tf.random.normal([1, num_hidden1], seed=119)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=119)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=119)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=119)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=119)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=119)) 

        self.Dropout_layer = tf.keras.layers.Dropout(rate = .15, seed = 1200)

    def call(self, x, training = False):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)
        h1 = self.Dropout_layer(h1, training = training)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)
        h2 = self.Dropout_layer(h2, training = training)

        yhat = tf.matmul(h2, self.W3) + self.b3
        return yhat

In [None]:
model = MyModel()

MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
loss = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))

In [None]:
# 데이터 섞기
np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

# 손실함수 기록용 변수
loss_train_history = []
loss_test_history = []

# 스토캐스틱 방법
from helper import generate_batches
for epoch in range(MaxEpochs):
    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features_train))
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch, training = True))
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    loss_train = loss(labels_train, model(features_train))
    loss_test = loss(labels_test, model(features_test))
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)

    if (epoch+1) in [1000,4000,8000]:
        prediction_values = model(features_train)
        final_loss = loss(labels_train, prediction_values)
        prediction_values_test = model(features_test)
        final_loss_test = loss(labels_test, prediction_values_test)
        plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, epoch+1))
        visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
        plt.show()

In [None]:
prediction_values = model(features_train)
final_loss = loss(labels_train, prediction_values)
prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test)
loss_train_history.append(final_loss)
loss_test_history.append(final_loss_test)

In [None]:
plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()

In [None]:
plt.plot(loss_train_history, '-k', label='Train', alpha=0.7)
plt.plot(loss_test_history, '--', color='gray', label='Test', linewidth=2, alpha=0.7)
plt.legend()
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([-.01, 0.07])
plt.show()

#### Dense( ) 이용

In [None]:
np.random.seed(327)
x_train = np.linspace(-1,1,50)
y_train = x_train**3 + 0.1 * x_train**2 -0.15 * x_train + 1.0 + 0.5 * np.random.rand(len(x_train))

x_test = np.linspace(-1,1,50)
y_test = x_test**3 + 0.1 * x_test**2 -0.15 * x_test + 1.0 + 0.5 * np.random.rand(len(x_test))

features_train = np.array([[xval] for xval in x_train])
labels_train = y_train.reshape(-1, 1)

features_test = np.array([[xval] for xval in x_test])
labels_test = y_test.reshape(-1, 1)

initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1., seed=119)

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 10 
        num_hidden2 = 5

        self.hidden1 = tf.keras.layers.Dense(num_hidden1, activation=tf.nn.sigmoid, 
            kernel_initializer=initializer, bias_initializer=initializer, name='hidden_1')

        self.hidden2 = tf.keras.layers.Dense(num_hidden2, activation=tf.nn.sigmoid, 
            kernel_initializer=initializer, bias_initializer=initializer, name='hidden_2')

        self.out = tf.keras.layers.Dense(1, activation = None, 
            kernel_initializer=initializer, bias_initializer=initializer, name='output')

        self.Dropout_layer = tf.keras.layers.Dropout(rate = .1, seed = 1200)

    def call(self, x, training = False):
        h1 = self.hidden1(x)
        h1 = self.Dropout_layer(h1, training = training)
        h2 = self.hidden2(h1)
        h2 = self.Dropout_layer(h2, training = training)
        yhat = self.out(h2)
        return yhat

In [None]:
model = MyModel()

MaxEpochs = 10000
batch_size = 10
lr = 0.05
optimizer = tf.keras.optimizers.Adam(lr)
loss = tf.keras.losses.MeanSquaredError()

# 데이터 섞기
np.random.seed(320)
shuffled_id = np.arange(0, len(x_train))
np.random.shuffle(shuffled_id)
shuffled_x_train = features_train[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

model.compile(optimizer = optimizer, loss = loss)
history = model.fit(shuffled_x_train, shuffled_y_train, epochs = MaxEpochs, batch_size = batch_size, 
                    shuffle = False, validation_data = (features_test, labels_test))

In [None]:
prediction_values = model(features_train)
final_loss = model.evaluate(features_train,labels_train)
prediction_values_test = model(features_test)
final_loss_test = model.evaluate(features_test,labels_test)

In [None]:
mse = lambda y, yhat: tf.reduce_mean(tf.square( yhat - y ))
mse_train_val = mse(labels_train, model(features_train))
mse_test_val = mse(labels_test, model(features_test))
print('loss:', mse_train_val.numpy())

In [None]:
plt.title("Train Loss = {:1.3f}, Test Loss = {:1.3f}\n at Epoch {}".format(final_loss, final_loss_test, MaxEpochs))
visualize_l2(prediction_values.numpy().reshape(-1), x_train, y_train, x_test, y_test)
plt.show()