In [1]:
import numpy as np
import os
import time

## File size

In [2]:
print('===============================')
print('Model file size')
print('===============================')
print('128:', os.path.getsize('b128.h5'))
print(' 64:', os.path.getsize('b64.h5'))
print(' 16:', os.path.getsize('b16.h5'))
print('  1:', os.path.getsize('b1.h5'))
print('===============================')
print('Weight file size')
print('===============================')
print('128:', os.path.getsize('b128_weights.h5'))
print(' 64:', os.path.getsize('b64_weights.h5'))
print(' 16:', os.path.getsize('b16_weights.h5'))
print('  1:', os.path.getsize('b1_weights.h5'))

Model file size
128: 1443904
 64: 1443888
 16: 1443888
  1: 1443904
Weight file size
128: 488808
 64: 488808
 16: 488808
  1: 488808


In [2]:
import keras
from keras.datasets import mnist
from keras import backend as K
(_, _), (x_test, y_test) = mnist.load_data()

x_test = x_test.reshape(10000, 784)
x_test = x_test.astype('float16')
x_test /= 255
y_test = keras.utils.to_categorical(y_test, 10)

Using TensorFlow backend.


In [7]:
def MyLoss(y_true, y_pred):
    return K.categorical_crossentropy(y_true, y_pred)

def MyMetrics(y_true, y_pred):
    return K.cast(K.equal(K.argmax(y_true, axis=-1),
                          K.argmax(y_pred, axis=-1)),
                  K.floatx())

## 1. Load model (model file)

In [17]:
from keras.models import load_model

for model_path in ['b128.h5','b64.h5','b16.h5','b1.h5']:
    print('===============================')
    print(model_path)
    model = load_model(model_path, custom_objects={'MyLoss': MyLoss, 'MyMetrics': MyMetrics})
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128.h5
processing time:  0.060410261154174805
b64.h5
processing time:  0.07071852684020996
b16.h5
processing time:  0.07716250419616699
b1.h5
processing time:  0.09580039978027344


In [35]:
from keras.models import load_model

for model_path in ['b128.h5','b64.h5','b16.h5','b1.h5']:
    print('===============================')
    print(model_path)
    model = load_model(model_path, custom_objects={'MyLoss': MyLoss, 'MyMetrics': MyMetrics}, compile=False)
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128.h5
processing time:  0.08330082893371582
b64.h5
processing time:  0.08836793899536133
b16.h5
processing time:  0.09023380279541016
b1.h5
processing time:  0.09201812744140625


## 2. Load weights (model file) & Compile

In [29]:
for model_path in ['b128.h5','b64.h5','b16.h5','b1.h5']:
    print('===============================')
    print(model_path)
    model.load_weights(model_path)
    model.compile(loss=MyLoss, metrics=[MyMetrics], optimizer=keras.optimizers.Adadelta())
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128.h5
processing time:  0.06458497047424316
b64.h5
processing time:  0.06462883949279785
b16.h5
processing time:  0.06543183326721191
b1.h5
processing time:  0.06795930862426758


## 3. Load weights (model file)

In [3]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(784,)))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [9]:
for model_path in ['b128.h5','b64.h5','b16.h5','b1.h5']:
    print('===============================')
    print(model_path)
    model.load_weights(model_path)
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128.h5
processing time:  0.1546173095703125
b64.h5
processing time:  0.0008764266967773438
b16.h5
processing time:  0.000705718994140625
b1.h5
processing time:  0.000507354736328125


## 4. Load weights (weight file)

In [15]:
for model_path in ['b128_weights.h5','b64_weights.h5','b16_weights.h5','b1_weights.h5']:
    print('===============================')
    print(model_path)
    model.load_weights(model_path)
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128_weights.h5
processing time:  0.0011894702911376953
b64_weights.h5
processing time:  0.0005702972412109375
b16_weights.h5
processing time:  0.0007457733154296875
b1_weights.h5
processing time:  0.0006995201110839844


## 5. Load weights (weight file float16)

In [11]:
for model_path in ['b128_weights_f16.h5']:
    print('===============================')
    print(model_path)
    model.load_weights(model_path)
    start = time.time()
    inputs = x_test[0][np.newaxis,...]
    model.predict(inputs)
    print("processing time: ", time.time() - start)

b128_weights_f16.h5
processing time:  0.0010137557983398438


## Conclusion (Inference speed)
- load weights (batch size 1) > load model