### Optimize Models with TF-Lite
`float16` quantization

In [1]:
import os
import tensorflow as tf

In [2]:
# disable GPU: we want to measure inference time on CPU
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [3]:
import ad

from ad import utils
from ad.models.vae import COD2VAE
from ad.models.dual_ae import CoDAE
from ad.optimize import ModelOptimizer

In [4]:
utils.set_random_seed(42)
ad.plot.set_style(default_cmap=ad.plot.CMAP2)

Random seed 42 set.


In [5]:
%load_ext autoreload
%autoreload 2

### CoDAE

In [6]:
model = CoDAE(image_shape=(72, 58, 1), latent_size=64,
              encoder1=dict(depths=(1, 1, 1, 1), filters=[64, 128, 128, 256],
                            groups=4),
              encoder2=dict(filters=[64, 128, 128, 256], groups=2),
              decoder=dict(filters=[256, 128, 128, 64], depths=(1, 1, 1, 1),
                           bias=-1.0, groups=4))

utils.load_from_checkpoint(model, path='codae', mode='max')

Loaded from "weights\codae\weights-30-0.873"


Convert the model to tf-lite

In [None]:
encoder_opt = ModelOptimizer(path='weights/tf-lite/codae/')
encoder_opt.from_keras_model(model=model.encoder1)
encoder_opt.convert()
encoder_opt.save(file='encoder1_quant_f16.tflite')

In [None]:
mask_opt = ModelOptimizer(path='weights/tf-lite/codae/')
mask_opt.from_keras_model(model=model.encoder2)
mask_opt.convert()
mask_opt.save(file='encoder2_quant_f16.tflite')

In [None]:
decoder_opt = ModelOptimizer(path='weights/tf-lite/codae/')
decoder_opt.from_keras_model(model=model.decoder)
decoder_opt.convert()
decoder_opt.save(file='decoder_quant_f16.tflite')

Measure inference time (batch_size = 1)

In [40]:
encoder_opt.interpret()
mask_opt.interpret()
decoder_opt.interpret()

In [41]:
test_image = tf.random.normal((1, 72, 58, 1))
test_z = encoder_opt.inference(test_image)
test_zm = mask_opt.inference(test_image)

In [42]:
%timeit -n 100 encoder_opt.inference(test_image)
%timeit -n 100 mask_opt.inference(test_image)
%timeit -n 100 decoder_opt.inference([test_z, test_zm])

4.75 ms ± 40.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.03 ms ± 75 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
32.3 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Categorical CoDVAE

In [7]:
vae = COD2VAE(image_shape=(72, 58, 1), latent_size=64, tau=1.0,
              large_encoder=dict(depths=(1, 1, 1, 1), filters=[64, 128, 128, 256],
                                 groups=4),
              mask_encoder=dict(filters=[64, 128, 128, 256], groups=2),
              decoder=dict(filters=[256, 128, 128, 64], depths=(1, 1, 1, 1),
                           bias=-1.0, groups=4))

utils.load_from_checkpoint(vae, path='categorical_codvae', mode='max')

Loaded from "weights\categorical_codvae\weights-100-0.766"


In [29]:
encoder_opt = ModelOptimizer(path='weights/tf-lite/codvae/')
encoder_opt.from_keras_model(model=vae.energy_encoder)

encoder_opt.convert()
encoder_opt.save(file='large_encoder_quant_f16.tflite')

In [None]:
mask_opt = ModelOptimizer(path='weights/tf-lite/codvae/')
mask_opt.from_keras_model(model=vae.mask_encoder)

mask_opt.convert()
mask_opt.save(file='mask_encoder_quant_f16.tflite')

In [30]:
decoder_opt = ModelOptimizer(path='weights/tf-lite/codvae/')
decoder_opt.from_keras_model(model=vae.decoder)

decoder_opt.convert()
decoder_opt.save(file='decoder_quant_f16.tflite')

Measure inference time

In [32]:
encoder_opt.interpret()
mask_opt.interpret()
decoder_opt.interpret()

In [33]:
test_image = tf.random.normal((1, 72, 58, 1))
test_z = encoder_opt.inference(test_image)[0]  # discard tau output
test_zm = mask_opt.inference(test_image)

In [35]:
%timeit -n 100 encoder_opt.inference(test_image)
%timeit -n 100 mask_opt.inference(test_image)
%timeit -n 100 decoder_opt.inference([test_z, test_zm])

7.38 ms ± 622 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.05 ms ± 56 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
32.8 ms ± 51.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
