# 14장. 오버피팅의 진단과 해결책

In [None]:
!wget -N https://github.com/DNRY/tfopt/raw/main/notebooks/helper.py

## 14.4 분류 문제

In [None]:
import numpy as np
import matplotlib.pylab as plt
import tensorflow as tf
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from sklearn.datasets import make_circles

# 학습용 데이터 생성
xy_train, labels_train = make_circles(n_samples=50, noise=0.1, random_state=717)
labels_train = labels_train.reshape(-1,1)

# 테스트용 데이터 생성
xy_test, labels_test = make_circles(n_samples=50, noise=0.1, random_state=712)
labels_test = labels_test.reshape(-1,1)

In [None]:
f_fn = lambda x,y: [x, y]
features = np.array([f_fn(xval, yval) for xval, yval in xy_train])
features_test = np.array([f_fn(xval, yval) for xval, yval in xy_test])
dim_features = features.shape[1]

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 64
        num_hidden2 = 32

        self.W1 = tf.Variable(tf.random.normal([2, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        l3 = tf.matmul(h2, self.W3) + self.b3
        yhat = tf.nn.sigmoid(l3)
        return yhat

In [None]:
model = MyModel()

loss = lambda y, yhat: tf.reduce_mean(-y * tf.math.log(yhat) - (1-y) * tf.math.log(1-yhat))

In [None]:
def accuracy(x,y):
    yhat = model(x)
    correct_prediction = tf.equal(y, tf.round(yhat))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
lr = 0.5
optimizer = tf.keras.optimizers.SGD(lr)

loss_train_history = []
loss_test_history = []
acc_train_history = []
acc_test_history = []

np.random.seed(320)
shuffled_id = np.arange(0, len(features))
np.random.shuffle(shuffled_id)
shuffled_x_train = features[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

MaxEpochs = 2000
batch_size = 10

In [None]:
from helper import generate_batches

for epoch in range(MaxEpochs):
    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features))
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch))
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    loss_train = loss(labels_train, model(features))
    loss_test = loss(labels_test, model(features_test))
    acc_train = accuracy(features, labels_train)
    acc_test = accuracy(features_test, labels_test)
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)
    acc_train_history.append(acc_train)
    acc_test_history.append(acc_test)

In [None]:
def plot_model(model, feature_labmda, xy, labels, xy2, labels2, title=''):
    from pandas import DataFrame
    xx, yy = np.meshgrid(np.linspace(-1.3,1.3, 400), np.linspace(-1.3,1.3, 400))
    input_xy = np.array([feature_labmda(xxval, yyval) for xxval, yyval in zip(xx.flatten(), yy.flatten())])
    prediction = model(input_xy).numpy()
    Z = prediction.reshape(xx.shape)
    df = DataFrame(dict(x=xy[:,0], y=xy[:,1], label=labels.flatten()))
    markers = {0:'bs', 1:'r^'}
    _, ax = plt.subplots(figsize=(7, 7))
    cs = ax.contourf(xx, yy, Z, 20, cmap='coolwarm', alpha=.8)
    ax.clabel(cs, colors='k')
    cs = ax.contour(xx, yy, Z, cmap='coolwarm', levels=[0, 0.5], linestyles='--', linewidths=2)
    ax.clabel(cs, colors='k')
    for k, xy0 in df[['x', 'y']].iterrows():
        x0, y0 = xy0.values
        plt.plot(x0, y0, markers[labels[k][0]], mec='k')

    markers = {0:'ws', 1:'w^'}    
    df = DataFrame(dict(x=xy2[:,0], y=xy2[:,1], label=labels2.flatten()))
    for k, xy0 in df[['x', 'y']].iterrows():
        x0, y0 = xy0.values
        plt.plot(x0, y0, markers[labels2[k][0]], mec='k', alpha=0.7)

    ax.set_xlim([-1.3, 1.3])
    ax.set_ylim([-1.3, 1.3])
    plt.grid(linestyle='--', alpha=0.5)
    plt.title(title)
    plt.show()

In [None]:
prediction_values = model(features)
final_loss_train = loss(labels_train, prediction_values)
final_acc_train = accuracy(features, labels_train)

prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test)
final_acc_test = accuracy(features_test, labels_test)

In [None]:
plot_model(model, f_fn, xy_train, labels_train, xy_test, labels_test, 
    'Train: loss={:1.2f}, acc={:1.2f}\n Test: loss={:1.2f}, acc={:1.2f}'.format(final_loss_train, 
                                                                                final_acc_train, 
                                                                                final_loss_test, 
                                                                                final_acc_test))

In [None]:
plt.plot(loss_train_history, '-k', label='Train (lr = {})'.format(lr))
plt.plot(loss_test_history, ':k', label='Test (lr = {})'.format(lr))
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('MaxEpochs = {}'.format(MaxEpochs))
plt.legend()
plt.show()

In [None]:
plt.plot(acc_train_history, '-k', label='Train (lr = {})'.format(lr))
plt.plot(acc_test_history, ':k', label='Test (lr = {})'.format(lr))
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('MaxEpochs = {}'.format(MaxEpochs))
plt.legend()
plt.show()

### $L^2$ Regularization

In [None]:
from sklearn.datasets import make_circles

xy_train, labels_train = make_circles(n_samples=50, noise=0.1, random_state=717)
labels_train = labels_train.reshape(-1,1)

xy_test, labels_test = make_circles(n_samples=50, noise=0.1, random_state=712)
labels_test = labels_test.reshape(-1,1)

f_fn = lambda x,y: [x, y]
features = np.array([f_fn(xval, yval) for xval, yval in xy_train])
features_test = np.array([f_fn(xval, yval) for xval, yval in xy_test])
dim_features = features.shape[1]

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        num_hidden1 = 64
        num_hidden2 = 32

        self.W1 = tf.Variable(tf.random.normal([2, num_hidden1], seed=624)) 
        self.b1 = tf.Variable(tf.random.normal([num_hidden1], seed=624)) 

        self.W2 = tf.Variable(tf.random.normal([num_hidden1, num_hidden2], seed=624)) 
        self.b2 = tf.Variable(tf.random.normal([num_hidden2], seed=624)) 

        self.W3 = tf.Variable(tf.random.normal([num_hidden2, 1], seed=624)) 
        self.b3 = tf.Variable(tf.random.normal([1], seed=624)) 

    def call(self, x):
        l1 = tf.matmul(x, self.W1) + self.b1
        h1 = tf.nn.sigmoid(l1)

        l2 = tf.matmul(h1, self.W2) + self.b2
        h2 = tf.nn.sigmoid(l2)

        l3 = tf.matmul(h2, self.W3) + self.b3
        yhat = tf.nn.sigmoid(l3)
        return yhat

In [None]:
model = MyModel()

l2_norm = lambda W1, W2, W3: tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)

beta = 3E-4
loss = lambda y, yhat, W1, W2, W3: tf.reduce_mean(-y * tf.math.log(yhat) - (1-y) * tf.math.log(1-yhat)) + beta*l2_norm(W1,W2,W3)

In [None]:
def accuracy(x,y):
    yhat = model(x)
    correct_prediction = tf.equal(y, tf.round(yhat))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
lr = 0.5
optimizer = tf.keras.optimizers.SGD(lr)

loss_train_history = []
loss_test_history = []
acc_train_history = []
acc_test_history = []

np.random.seed(320)
shuffled_id = np.arange(0, len(features))
np.random.shuffle(shuffled_id)
shuffled_x_train = features[shuffled_id]
shuffled_y_train = labels_train[shuffled_id]

MaxEpochs = 2000
batch_size = 10

In [None]:
from helper import generate_batches

for epoch in range(MaxEpochs):
    if epoch % 100 == 0:
        curr_loss = loss(labels_train, model(features), model.W1, model.W2, model.W3)
        print(epoch, curr_loss.numpy())
    for x_batch, y_batch in generate_batches(batch_size, shuffled_x_train, shuffled_y_train):
        with tf.GradientTape() as tape:
            curr_loss = loss(y_batch, model(x_batch), model.W1, model.W2, model.W3)
            gradients = tape.gradient(curr_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    loss_train = loss(labels_train, model(features), model.W1, model.W2, model.W3)
    loss_test = loss(labels_test, model(features_test), model.W1, model.W2, model.W3)
    acc_train = accuracy(features, labels_train)
    acc_test = accuracy(features_test, labels_test)
    loss_train_history.append(loss_train)
    loss_test_history.append(loss_test)
    acc_train_history.append(acc_train)
    acc_test_history.append(acc_test)

In [None]:
prediction_values = model(features)
final_loss_train = loss(labels_train, prediction_values, model.W1, model.W2, model.W3)
final_acc_train = accuracy(features, labels_train)

prediction_values_test = model(features_test)
final_loss_test = loss(labels_test, prediction_values_test, model.W1, model.W2, model.W3)
final_acc_test = accuracy(features_test, labels_test)

In [None]:
plot_model(model, f_fn, xy_train, labels_train, xy_test, labels_test, 
    'Train: loss={:1.2f}, acc={:1.2f}\n Test: loss={:1.2f}, acc={:1.2f}'.format(final_loss_train, 
                                                                                final_acc_train, 
                                                                                final_loss_test, 
                                                                                final_acc_test))

In [None]:
plt.plot(loss_train_history, '-k', label='Train (lr = {})'.format(lr))
plt.plot(loss_test_history, ':k', label='Test (lr = {})'.format(lr))
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('MaxEpochs = {}'.format(MaxEpochs))
plt.legend()
plt.show()

In [None]:
plt.plot(acc_train_history, '-k', label='Train (lr = {})'.format(lr))
plt.plot(acc_test_history, ':k', label='Test (lr = {})'.format(lr))
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('MaxEpochs = {}'.format(MaxEpochs))
plt.legend()
plt.show()