# 처음부터 훈련 루프 작성
---

기본적인 코드 작성은 이전 챕터만으로 충분히 가능합니다.

다만, model.fit()을 활용하지 않고 처음 부터 작성하려면 좀 더 노력이 필요합니다.

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# GrdientTape 사용

model.trainable_weights를 사용하여, 모델의 학습 가능한 파라미터에 접근 가합니다.

이를 이용해서 업데이트 할 수 있습니다.

In [2]:
inputs = keras.Input(shape=(784,), name="digits")
x1 = layers.Dense(64, activation="relu")(inputs)
x2 = layers.Dense(64, activation="relu")(x1)
outputs = layers.Dense(10, name="predictions")(x2)
model = keras.Model(inputs=inputs, outputs=outputs)

In [3]:
# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))

# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)

In [4]:
epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train, training=True)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * 64))


Start of epoch 0
Training loss (for one batch) at step 0: 101.5326
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.1228
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.1271
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.5966
Seen so far: 38464 samples

Start of epoch 1
Training loss (for one batch) at step 0: 1.1202
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.4148
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.6079
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.4739
Seen so far: 38464 samples


# Low Level Metric 처리
---

위의 기본적인 루프에 메트릭 모니터링을 추가합니다.

-  루프 시작시 Metric 인스턴스화
-  각 배치 후 metric.update_state()호출
-  메트릭의 현재값을 표시해야하는 경우 metric.result()호출
-  메트릭의 상태를 지워야할 때 metric.reset_states()를 호출

In [5]:
# Get model
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [6]:
import time

epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Update training metric.
        train_acc_metric.update_state(y_batch_train, logits)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * 64))

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 0
Training loss (for one batch) at step 0: 107.7538
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.2039
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.2359
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.1142
Seen so far: 38464 samples
Training acc over epoch: 0.6108
Validation acc: 0.7745
Time taken: 2.79s

Start of epoch 1
Training loss (for one batch) at step 0: 0.5413
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.0308
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.8225
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.8744
Seen so far: 38464 samples
Training acc over epoch: 0.7934
Validation acc: 0.8394
Time taken: 2.71s


# tf.function을 훈련 스탭 가속화
---

기본적으로 Tensorflow2는 Eager Execution을 사용하여 작동합니다.

그래프를 생성하지 않고, 함수를 바로 실행하는 방식입니다.

이는 디버깅등에서 여러 이점이 존재하지만, 속도 면에서 그래프를 생성 해 놓았을 때보다 부족합니다.

우리는 @tf.function이라는 데코레이터를 작성함으로 그래프를 스택틱하게 컴파일을 할 수 있습니다.

In [7]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value

In [8]:
@tf.function
def test_step(x, y):
    val_logits = model(x, training=False)
    val_acc_metric.update_state(y, val_logits)

In [9]:
import time

epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        loss_value = train_step(x_batch_train, y_batch_train)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * 64))

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        test_step(x_batch_val, y_batch_val)

    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))


Start of epoch 0
Training loss (for one batch) at step 0: 0.6773
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.4892
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.5546
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.6783
Seen so far: 38464 samples
Training acc over epoch: 0.8412
Validation acc: 0.8642
Time taken: 0.98s

Start of epoch 1
Training loss (for one batch) at step 0: 0.6138
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.4593
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.3656
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.6005
Seen so far: 38464 samples
Training acc over epoch: 0.8635
Validation acc: 0.8693
Time taken: 0.70s
