In [16]:
import tensorflow as tf
import pandas as pd
import os
import sys
sys.path.append('/Users/hoyoung/Desktop/pycharm_work/korean_grammar_corrector/bin')
sys.path.append('/Users/hoyoung/Desktop/pycharm_work/korean_grammar_corrector/utils')

import tensorflow_preprocess as tp
from model.transformer_model import *

In [4]:
df = pd.read_csv('../../../data/train/corpus_repair_test.csv')

In [6]:
MAX_LENGTH = 24

def loss_function(y_true, y_pred):
    y_true = tf.reshape(y_true, shape=(-1, MAX_LENGTH - 1))

    loss = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')(y_true, y_pred)

    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
    loss = tf.multiply(loss, mask)

    return tf.reduce_mean(loss)


class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

def accuracy(y_true, y_pred):
    # 레이블의 크기 : (batch_size, MAX_LENGTH - 1)
    y_true = tf.reshape(y_true, shape = (-1, MAX_LENGTH-1))
    return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)

In [7]:
src = df['src'].apply(lambda x: tp.full_stop_filter(x))
tgt = df['tgt'].apply(lambda x: tp.full_stop_filter(x))

inputs, outputs, tokenizer = tp.tokenize_and_filter(src, tgt, max_length=24)
dataset = tp.create_train_dataset(inputs, outputs, batch_size=64, buffer_size=20000)

2022-11-24 15:37:04.217776: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
tf.keras.backend.clear_session()

d_model = 256
num_layers = 5
num_heads = 8
dff = 512
dropout = 0.1

model = transformer(
    vocab_size = tokenizer.vocab_size+2,
    num_layers = num_layers,
    dff = dff,
    d_model = d_model,
    num_heads = num_heads,
    dropout = dropout,
    name="transformer")

In [14]:
model.summary()

Model: "transformer"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, None)]       0           []                               
                                                                                                  
 dec_inputs (InputLayer)        [(None, None)]       0           []                               
                                                                                                  
 enc_padding_mask (Lambda)      (None, 1, 1, None)   0           ['inputs[0][0]']                 
                                                                                                  
 encoder (Functional)           (None, None, 256)    4749824     ['inputs[0][0]',                 
                                                                  'enc_padding_mask[0][0

In [13]:
learning_rate = CustomSchedule(128)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])

In [17]:
checkpoint_path = "../../../checkpoint/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# 모델의 가중치를 저장하는 콜백 만들기
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [19]:
epochs = 3
model.fit(dataset, epochs=epochs, callbacks=[cp_callback])

Epoch 1/3
Epoch 1: saving model to ../../../checkpoint/cp.ckpt
Epoch 2/3
Epoch 2: saving model to ../../../checkpoint/cp.ckpt
Epoch 3/3
Epoch 3: saving model to ../../../checkpoint/cp.ckpt


<keras.callbacks.History at 0x7f9e55d9c040>

In [20]:
os.listdir(checkpoint_dir)

['cp.ckpt.data-00000-of-00001', 'checkpoint', 'cp.ckpt.index']

In [22]:
model2 = transformer(
    vocab_size = tokenizer.vocab_size+2,
    num_layers = num_layers,
    dff = dff,
    d_model = d_model,
    num_heads = num_heads,
    dropout = dropout,
    name="transformer_2")

In [23]:
model2.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f9e16b38070>