# Model save and reuse in TF2
1. 체크 포인트 저장하기 with fit()
  
  1.1 체크포인트 콜백 사용하기

  1.2 수동으로 가중치 저장하기
  
  1.3 전체 모델 저장하기(savedmodel, hdf5) 
2. 체크 포인트 저장하기 with tf.GradientTape() as tape


Reference
- https://www.tensorflow.org/tutorials/keras/save_and_load?hl=ko
- https://www.tensorflow.org/guide/checkpoint?hl=ko

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Input, Dense, Activation, Conv2D, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import sys
import datetime
import numpy as np
import os
import random

In [2]:
# random seed settings
setting_number = 777
tf.random.set_seed(setting_number)
np.random.seed(setting_number)
random.seed(setting_number)

In [3]:
print('Tensorflow version : ', tf.__version__)
print('System version : ', sys.version_info)

Tensorflow version :  2.6.0
System version :  sys.version_info(major=3, minor=7, micro=12, releaselevel='final', serial=0)


In [4]:
# === Data Load === #
(x_train, y_train), (x_test, y_test) = mnist.load_data()

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

w, h = 28, 28

x_train = x_train.reshape(-1, w, h, 1)
x_test = x_test.reshape(-1, w, h, 1)

x_train = x_train / 255.
x_test = x_test / 255.

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
def build(data_shape):
  # build simple CNN
  inputs = Input(data_shape)

  x = Conv2D(32, (3, 3), padding='same')(inputs)
  x = Activation('relu')(x)

  x = Conv2D(64, (3, 3), padding='same')(x)
  x = Activation('relu')(x)

  x = Flatten()(x)
  x = Dense(10)(x)
  x = Activation('sigmoid')(x)

  model = Model(inputs = inputs, outputs = x)


  return model

In [6]:
model = build(x_train.shape[1:])

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
activation (Activation)      (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 50176)             0         
_________________________________________________________________
dense (Dense)                (None, 10)                501770

In [8]:
# loss function settings
model.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])

In [9]:
# hyperparameter settings
EPOCHS = 3
BATCH_SIZE = 256

# 1. 체크 포인트 저장하기 with fit()

## 1.1. 체크포인트 콜백 사용하기

In [10]:
# 사용 모델 정의
model_ckpt = build(x_train.shape[1:])
model_ckpt.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])# loss function settings

In [11]:
checkpoint_path_dir = "training_ckpt"

# 파일 이름에 번호를 포함시킵니다(`str.format` 포맷)
cp_name = "{epoch:02d}-{val_loss:.4f}-{acc:.4f}.ckpt"
if not(os.path.exists(checkpoint_path_dir)):
  os.mkdir(checkpoint_path_dir)
  print("Create Directory!")

checkpoint_path = os.path.join(checkpoint_path_dir, cp_name)

# 모델의 가중치를 저장하는 콜백 만들기
# validation loss가 가장 낮은 모델 저장
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 monitor = 'val_loss',
                                                 verbose=1)
# # `checkpoint_path` 포맷을 사용하는 가중치를 저장합니다
# model.save_weights(checkpoint_path.format(epoch=0))

# 새로운 콜백으로 모델 학습
model_ckpt.fit(x_train, 
          y_train,  
          epochs=EPOCHS,
          validation_data=(x_test,y_test),
          callbacks=[cp_callback])  # 콜백을 훈련에 전달합니다

Create Directory!
Epoch 1/3

Epoch 00001: saving model to training_ckpt/01-0.0136-0.9636.ckpt
Epoch 2/3

Epoch 00002: saving model to training_ckpt/02-0.0098-0.9880.ckpt
Epoch 3/3

Epoch 00003: saving model to training_ckpt/03-0.0096-0.9922.ckpt


<keras.callbacks.History at 0x7fae10622550>

In [12]:
# latest model 
latest = tf.train.latest_checkpoint(checkpoint_path_dir)
latest

'training_ckpt/03-0.0096-0.9922.ckpt'

In [13]:
# 테스트 모델 만들기
model_ckpt_test = build(x_train.shape[1:])
model_ckpt_test.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])

# 가중치 로드
model_ckpt_test.load_weights(latest)

# 모델 재평가
loss,acc = model_ckpt_test.evaluate(x_test,  y_test, verbose=2)
print("복원된 모델의 정확도: {:5.2f}%".format(100*acc))

313/313 - 1s - loss: 0.0096 - acc: 0.9900
복원된 모델의 정확도: 99.00%


## 1.2. 수동으로 가중치 저장하기

In [14]:
# 사용 모델 정의
model_manual = build(x_train.shape[1:])
model_manual.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])# loss function settings

In [15]:
checkpoint_path_dir = "training_manual"

# 파일 이름에 번호를 포함시킵니다(`str.format` 포맷)
cp_name = "my_checkpoint.ckpt"
if not(os.path.exists(checkpoint_path_dir)):
  os.mkdir(checkpoint_path_dir)
  print("Create Directory!")

checkpoint_path = os.path.join(checkpoint_path_dir, cp_name)

# 학습
model_manual.fit(x_train, 
          y_train,  
          epochs=EPOCHS,
          validation_data=(x_test,y_test))

# 가중치를 저장합니다
# save_weights는 수동으로 가중치를 저장
model_manual.save_weights(checkpoint_path)

# 새로운 모델 객체를 만듭니다
model_manual_test = build(x_train.shape[1:])
model_manual_test.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])# loss function settings

# 가중치를 복원합니다
model_manual_test.load_weights(checkpoint_path)

# 모델을 평가합니다
loss,acc = model_manual_test.evaluate(x_test,  y_test, verbose=2)
print("복원된 모델의 정확도: {:5.2f}%".format(100*acc))

Create Directory!
Epoch 1/3
Epoch 2/3
Epoch 3/3
313/313 - 1s - loss: 0.0095 - acc: 0.9893
복원된 모델의 정확도: 98.93%


## 1.3. 전체 모델 저장하기
- model.save 메서드를 호출하여 `모델의 구조, 가중치, 훈련 설정`을 하나의 파일/폴더에 저장합니다

- refer to https://www.tensorflow.org/tutorials/keras/save_and_load?hl=ko#%EC%B2%B4%ED%81%AC%ED%8F%AC%EC%9D%B8%ED%8A%B8_%EC%BD%9C%EB%B0%B1_%EB%A7%A4%EA%B0%9C%EB%B3%80%EC%88%98

In [16]:
# 사용 모델 정의
model_all = build(x_train.shape[1:])
model_all.compile(optimizer="adam", loss="binary_crossentropy", metrics = ['acc'])# loss function settings

In [17]:
# 학습
model_all.fit(x_train, 
          y_train,  
          epochs=EPOCHS,
          validation_data=(x_test,y_test))

# 저장
checkpoint_path_dir = "training_all"
if not(os.path.exists(checkpoint_path_dir)):
  os.mkdir(checkpoint_path_dir)
  print("Create Directory!")

model_all.save('./training_all/model_all')

Epoch 1/3
Epoch 2/3
Epoch 3/3
Create Directory!
INFO:tensorflow:Assets written to: ./training_all/model_all/assets


In [18]:
model_all_test = tf.keras.models.load_model('training_all/model_all')

In [19]:
model_all_test.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_15 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
activation_16 (Activation)   (None, 28, 28, 64)        0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 50176)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                5017

In [20]:
# 모델을 평가합니다
loss,acc = model_all_test.evaluate(x_test,  y_test, verbose=2)
print("복원된 모델의 정확도: {:5.2f}%".format(100*acc))

313/313 - 1s - loss: 0.0093 - acc: 0.9896
복원된 모델의 정확도: 98.96%


## 2. 체크 포인트 저장하기 with tf.GradientTape() as tape

refer to https://www.tensorflow.org/guide/checkpoint?hl=ko

In [21]:
# 사용 모델 정의
model_tape = build(x_train.shape[1:])

In [22]:
opt = tf.keras.optimizers.Adam(1e-3)
loss_object = tf.keras.losses.CategoricalCrossentropy() # one-hot vector에 사용
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.CategoricalAccuracy('train_accuracy')

# 저장
checkpoint_path_dir = "training_tape"
if not(os.path.exists(checkpoint_path_dir)):
  os.mkdir(checkpoint_path_dir)
  print("Create Directory!")

# set checkpoint manager
ckpt = tf.train.Checkpoint(step=tf.Variable(0), model=model_tape)
# max_to_keep: 최대 ckpt를 몇 개만 저장할 것인가
ckpt_manager = tf.train.CheckpointManager(ckpt,
                                        directory=checkpoint_path_dir,
                                        max_to_keep=None)
latest_ckpt = tf.train.latest_checkpoint(checkpoint_path_dir)

Create Directory!


In [23]:
def train_step(net, x_data, y_data, optimizer):
  """Trains `net` on `example` using `optimizer`."""
  with tf.GradientTape() as tape:
    predictions = net(x_data, training=True)
    loss = loss_object(y_data, predictions)
  grads = tape.gradient(loss, net.trainable_variables)
  optimizer.apply_gradients(zip(grads, net.trainable_variables))

  train_accuracy(y_data, predictions)
  train_loss(loss)

  return loss

In [24]:
def train_and_checkpoint(net, manager, opt, ckpt):
  try:
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
      print("Restored from {}".format(manager.latest_checkpoint))
    else:
      print("Initializing from scratch.")
  except:
    pass

  batch = x_train.shape[0] // BATCH_SIZE
  
  for epoch in range(EPOCHS):
    epoch_loss = []
    for i in range(batch):
      x_data, y_data = x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE], y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
      loss = train_step(net, x_data, y_data, opt)
      epoch_loss.append(loss)
      ckpt.step.assign_add(1)
      if int(ckpt.step) % 10 == 0:
        save_path = manager.save(checkpoint_number=ckpt.step)
        print("\rSaved checkpoint for step {}: {}".format(int(ckpt.step), save_path), end='')
        # print("\rloss {:1.2f}".format(loss.numpy()), end='')

    template = 'Epoch {}, Loss: {}, Accuracy: {}'
    print('')
    print (template.format(epoch+1,np.mean(epoch_loss), train_accuracy.result()*100))
    
    # Reset metrics every epoch - 매 epoch마다 reset
    train_loss.reset_states()
    train_accuracy.reset_states()

In [25]:
train_and_checkpoint(model_tape, ckpt_manager, opt, ckpt)

Initializing from scratch.
Saved checkpoint for step 230: training_tape/ckpt-230
Epoch 1, Loss: 0.24059836566448212, Accuracy: 93.24418640136719
Saved checkpoint for step 460: training_tape/ckpt-460
Epoch 2, Loss: 0.06606880575418472, Accuracy: 98.01849365234375
Saved checkpoint for step 700: training_tape/ckpt-700
Epoch 3, Loss: 0.04512513056397438, Accuracy: 98.65284729003906


In [26]:
model_tape_test = build(x_train.shape[1:])

# set checkpoint manager
ckpt_test = tf.train.Checkpoint(step=tf.Variable(0), model=model_tape_test)
latest_ckpt = tf.train.latest_checkpoint(checkpoint_path_dir)

# restore latest checkpoint
if latest_ckpt:
  ckpt_test.restore(latest_ckpt)
  print('global_step : {}, checkpoint is restored!'.format(int(ckpt_test.step)))


global_step : 700, checkpoint is restored!


In [27]:
def compute_accuracy(y_pred, y):
  correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

  return accuracy

In [28]:
acc = compute_accuracy(model_tape_test(x_test), y_test)

In [29]:
# 모델 재평가
print("복원된 모델의 정확도: {:5.2f}%".format(100*acc))

복원된 모델의 정확도: 98.23%
