In [1]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import mnist
from keras import models
from keras.layers import Dense, Activation, Conv2D, Flatten
from keras.optimizers import Adam
import os

Using TensorFlow backend.


# Data load & preprocessing

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

x_train = x_train / 255.0
x_test = x_test / 255.0

# Build network

In [3]:
def build_net():
    model = models.Sequential()
    model.add(Conv2D(16, (3,3), padding = 'same', input_shape = (28, 28, 1)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(10))
    model.add(Activation('softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = Adam(lr = 0.001)
                          ,metrics = ['accuracy'])
    return model

In [4]:
mnist_net = build_net()

# Save model after train

In [5]:
# make directory
save_dir = './save_model/'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

Train network

In [6]:
mnist_net.fit(x_train, y_train, batch_size = 32, epochs = 2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1ffc2a96d68>

#### The save () function stores both the structure and the weight of the model.

In [7]:
mnist_net.save(save_dir + 'save_model.h5')

# saved model reuse using load_model()

import load_model

In [8]:
from keras.models import load_model

load model architecture and weights

In [9]:
loaded_model = load_model(save_dir + 'save_model.h5')

show the architecture

In [10]:
loaded_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 16)        160       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 12544)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                125450    
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 125,610
Trainable params: 125,610
Non-trainable params: 0
_________________________________________________________________


#### The get_config () function shows detailed configuration information for the model 

In [11]:
loaded_model.get_config()

{'layers': [{'class_name': 'Conv2D',
   'config': {'activation': 'linear',
    'activity_regularizer': None,
    'batch_input_shape': (None, 28, 28, 1),
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'dtype': 'float32',
    'filters': 16,
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'VarianceScaling',
     'config': {'distribution': 'uniform',
      'mode': 'fan_avg',
      'scale': 1.0,
      'seed': None}},
    'kernel_regularizer': None,
    'kernel_size': (3, 3),
    'name': 'conv2d_1',
    'padding': 'same',
    'strides': (1, 1),
    'trainable': True,
    'use_bias': True}},
  {'class_name': 'Activation',
   'config': {'activation': 'relu',
    'name': 'activation_1',
    'trainable': True}},
  {'class_name': 'Flatten',
   'config': {'data_format': 'channels_last',
    'name': 'flatten_1',
    'trainable': True

In [12]:
loaded_model.predict(x_test)

array([[2.0910824e-09, 2.1872920e-10, 7.2590318e-08, ..., 9.9997354e-01,
        3.2753408e-07, 5.3104918e-06],
       [1.8211487e-05, 9.4478550e-05, 9.9955171e-01, ..., 1.0896243e-14,
        7.2268574e-07, 6.6417168e-13],
       [2.2806544e-05, 9.9760652e-01, 4.5086499e-04, ..., 9.3125587e-04,
        5.0584809e-04, 6.3833381e-06],
       ...,
       [1.9720436e-09, 8.5637328e-09, 1.9384997e-08, ..., 1.4988969e-04,
        3.2159423e-05, 2.6284895e-04],
       [8.0302407e-08, 7.8227309e-09, 5.0307363e-09, ..., 2.6371936e-09,
        4.2345829e-04, 1.0097833e-08],
       [2.5761246e-08, 7.1552424e-11, 2.8835304e-06, ..., 7.2816163e-11,
        1.0451320e-07, 2.5249899e-10]], dtype=float32)

In [13]:
loaded_model.evaluate(x_test, y_test)



[0.07690361095573753, 0.9769]

#### Import and reuse only weight values from saved models

In [14]:
mnist_net_only_weights = build_net()

In [15]:
mnist_net_only_weights.load_weights(save_dir + 'save_model.h5')

In [16]:
mnist_net_only_weights.predict(x_test)

array([[2.0910824e-09, 2.1872920e-10, 7.2590318e-08, ..., 9.9997354e-01,
        3.2753408e-07, 5.3104918e-06],
       [1.8211487e-05, 9.4478550e-05, 9.9955171e-01, ..., 1.0896243e-14,
        7.2268574e-07, 6.6417168e-13],
       [2.2806544e-05, 9.9760652e-01, 4.5086499e-04, ..., 9.3125587e-04,
        5.0584809e-04, 6.3833381e-06],
       ...,
       [1.9720436e-09, 8.5637328e-09, 1.9384997e-08, ..., 1.4988969e-04,
        3.2159423e-05, 2.6284895e-04],
       [8.0302407e-08, 7.8227309e-09, 5.0307363e-09, ..., 2.6371936e-09,
        4.2345829e-04, 1.0097833e-08],
       [2.5761246e-08, 7.1552424e-11, 2.8835304e-06, ..., 7.2816163e-11,
        1.0451320e-07, 2.5249899e-10]], dtype=float32)

In [17]:
mnist_net_only_weights.evaluate(x_test, y_test)



[0.07690361095573753, 0.9769]

# only save weights

In [18]:
mnist_net_save_only_weights = build_net()
mnist_net_save_only_weights.fit(x_train, y_train, batch_size = 32, epochs = 2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1ffc5375b38>

In [19]:
mnist_net_save_only_weights.evaluate(x_test, y_test)



[0.06776213491801172, 0.9791]

save_weights save only weight parameters

In [20]:
mnist_net_save_only_weights.save_weights(save_dir + 'save_only_weights.h5')

In [21]:
mnist_net_save_only_weights_reuse = build_net()

load only weights and evaluate loaded model

In [22]:
mnist_net_save_only_weights_reuse.load_weights(save_dir + 'save_only_weights.h5')

In [23]:
mnist_net_save_only_weights_reuse.evaluate(x_test, y_test)



[0.06776213491801172, 0.9791]

# save model as json format

#### Model information can be converted to json format and stored in memory as string data

In [24]:
mnist_json = build_net()

In [25]:
mnist_json_model = mnist_json.to_json()

In [26]:
print(mnist_json_model)

{"keras_version": "2.2.4", "config": {"name": "sequential_5", "layers": [{"class_name": "Conv2D", "config": {"kernel_constraint": null, "bias_regularizer": null, "name": "conv2d_5", "kernel_regularizer": null, "filters": 16, "dtype": "float32", "strides": [1, 1], "dilation_rate": [1, 1], "activation": "linear", "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "use_bias": true, "bias_constraint": null, "padding": "same", "activity_regularizer": null, "bias_initializer": {"class_name": "Zeros", "config": {}}, "trainable": true, "batch_input_shape": [null, 28, 28, 1], "kernel_size": [3, 3], "data_format": "channels_last"}}, {"class_name": "Activation", "config": {"name": "activation_9", "trainable": true, "activation": "relu"}}, {"class_name": "Flatten", "config": {"name": "flatten_5", "trainable": true, "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"bias_regularizer": null

#### json file write to path

In [27]:
with open(save_dir + "/mnist_json_model.json", "w") as json_file : 
    json_file.write(mnist_json_model)

#### json format model information can be reloaded and reused

### Load json and create model

In [28]:
from keras.models import model_from_json

In [29]:
# read json file from path
json_file = open(save_dir + "/mnist_json_model.json", "r")
loaded_model_json = json_file.read() 
json_file.close()

# load model from json file
saved_mnist_json_model = model_from_json(loaded_model_json)

load cnn weights

In [30]:
saved_mnist_json_model.load_weights(save_dir + 'save_only_weights.h5')

## Model information of json format should be compiled again after model load.

Makes the compile condition the same

In [31]:
# json format should be compiled
saved_mnist_json_model.compile(loss = 'categorical_crossentropy', optimizer = Adam(lr = 0.001)
                          ,metrics = ['accuracy'])

In [32]:
saved_mnist_json_model.evaluate(x_test, y_test)



[0.06776213491801172, 0.9791]

# Best model save using callback

import library

In [33]:
from keras.callbacks import ModelCheckpoint

In [34]:
model_path = save_dir + '{epoch:02d}-{val_loss:.4f}-{acc:.4f}.hd5'

# Only when val_loss is lower than before
cb_checkpoint = ModelCheckpoint(filepath = model_path, monitor = 'val_loss', verbose = 1, save_best_only = True)

In [35]:
best_save_model = build_net()
best_save_model.fit(x_train, y_train,validation_split = 0.2, batch_size = 32, epochs = 10, callbacks = [cb_checkpoint])

Train on 48000 samples, validate on 12000 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.10525, saving model to ./save_model/01-0.1053-0.9369.hd5
Epoch 2/10

Epoch 00002: val_loss improved from 0.10525 to 0.08648, saving model to ./save_model/02-0.0865-0.9757.hd5
Epoch 3/10

Epoch 00003: val_loss improved from 0.08648 to 0.07777, saving model to ./save_model/03-0.0778-0.9828.hd5
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.07777
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.07777
Epoch 6/10

Epoch 00006: val_loss improved from 0.07777 to 0.07766, saving model to ./save_model/06-0.0777-0.9919.hd5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.07766
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.07766
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.07766
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.07766


<keras.callbacks.History at 0x1ffc593e710>

# Early Stopping

#### If a model with the highest performance is found in the beginning, and a model with better performance is no longer found

In [36]:
from keras.callbacks import EarlyStopping

In [37]:
# if val_loss can not increase in 3 patience(epochs), stop training
cb_early_stopping = EarlyStopping(monitor = 'val_loss', patience = 3)

# create model
earlystopping_model = build_net()
earlystopping_model.fit(x_train, y_train,validation_split = 0.2, batch_size = 32, epochs = 10, callbacks = [cb_early_stopping])

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


<keras.callbacks.History at 0x1ffc5c028d0>

# best model save + early stopping

In [38]:
combined_model = build_net()

In [39]:
combined_model.fit(x_train, y_train,validation_split = 0.2, batch_size = 32, epochs = 10, callbacks = [cb_checkpoint,cb_early_stopping])

Train on 48000 samples, validate on 12000 samples
Epoch 1/10

Epoch 00001: val_loss did not improve from 0.07766
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.07766
Epoch 3/10

Epoch 00003: val_loss improved from 0.07766 to 0.07658, saving model to ./save_model/03-0.0766-0.9826.hd5
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.07658
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.07658
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.07658


<keras.callbacks.History at 0x1ffe368c860>

# Model ReduceLROnPlateau

Reduce learning rate when a metric has stopped improving <br>
This function monitors that if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced.

In [None]:
# example
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)
model.fit(X_train, Y_train, callbacks=[reduce_lr])