# Chapter 4 Code

Covers code for Chapter 4, "Model Compression for Practical Deployment", of *Modern Deep Learning Design and Application*.

---

## Installing + Importing Libraries

In [1]:
# install and import tensorflow model optimization
!pip install tensorflow-model-optimization
import tensorflow_model_optimization
import tensorflow_model_optimization as tfmot

# array processing + math
import numpy as np
import pandas as pd
import scipy
import math
import sklearn

# plotting & visuals
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

# looping - very helpful for data manpulation scripts
from tqdm.notebook import tqdm

# file and storage managing libraries
import zipfile as zf
import tempfile
import os
import time

# deep learning staple libraries
# !pip install tensorflow # install if necessary
import sklearn
import tensorflow as tf
from tensorflow import keras

# keras specifics
import keras.layers as L
import keras.backend as K
from keras.utils import plot_model

Collecting tensorflow-model-optimization
  Downloading tensorflow_model_optimization-0.6.0-py2.py3-none-any.whl (211 kB)
[K     |████████████████████████████████| 211 kB 517 kB/s 
Installing collected packages: tensorflow-model-optimization
Successfully installed tensorflow-model-optimization-0.6.0


---

## Loading Data

In [2]:
# Load MNIST dataset
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((len(x_train), 28*28))
y_train = keras.utils.to_categorical(y_train)
x_test = x_test.reshape((len(x_test), 28*28))
y_test = keras.utils.to_categorical(y_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


---

## Benchmark Model

In [3]:
# import layers
import keras.layers as L

# construct Sequential model
model = keras.Sequential()

# construct Input
model.add(L.Input((784,)))

# construct processing layers
for i in list(range(5,10))[::-1]:
    model.add(L.Dense(2**i, activation='relu'))
model.add(L.Dense(2**i, activation='relu'))

# construct output layer
model.add(L.Dense(10, activation='softmax'))

# compile and fit
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance



<tensorflow.python.keras.callbacks.History at 0x7f33243d8610>

---

## Defining Model Metrics

Define storage size of a model using temporary files and zipping.

In [4]:
def get_size(model):
    
    # create file for weights
    _, weightsfile = tempfile.mkstemp(".h5")
    
    # save weights to file
    model.save_weights(weightsfile)
    
    # create file for zipped weights file
    _, zippedfile = tempfile.mkstemp(".zip")
    
    # zip weights file
    with zf.ZipFile(zippedfile, "w", 
                    compression=zf.ZIP_DEFLATED) as f:
        f.write(weightsfile)
        
    # return size of model, in megabytes
    return str(os.path.getsize(zippedfile)/float(2**20))+' MB'

Define latency of a model by timing prediction time on the dataset.

In [5]:
def get_latency(model):
    
    # begin timer
    start = time.time()
    
    # make prediction
    res = model.predict(x_test)
    
    # end timer
    end = time.time()
    
    # return mean time to predict on a sample
    return (end-start)/(len(x_test))

Parameter metrics.

In [6]:
# import count nonzero function
from numpy import count_nonzero as nz

def get_param_metrics(orig_model, pruned_model):
    
    # count orig model weights
    orig_model_weights = orig_model.get_weights()
    om_params = sum([np.nz(l) for l in orig_model_weights])
    
    # count pruned model weights
    p_model_weights = pruned_model.get_weights()
    p_params = sum([np.nz(l).size for l in p_model_weights])

    # return information organized in dictionary
    return {'Original Model Parameter Count:': om_params,
            'Pruned Model Parameter Count': p_params,
            'Pruned to Original Weights Ratio': p_params/om_params,
            'Compression Ratio': 1 - p_params/om_params}

---

## Pruning

### Pruning an Entire Model

In [7]:
# crete pruning schedule
from tensorflow_model_optimization.sparsity.keras import PolynomialDecay as PD
end_step = np.ceil(len(x_train)/32) * 1
schedule = PD(initial_sparsity=0.50,
              final_sparsity=0.95,
              begin_step=0,
              end_step=end_step,
              frequency=128)

# define pruning parameters
pruning_params = {
    'pruning_schedule': schedule
}

# create pruning model
from tensorflow_model_optimization.sparsity.keras import prune_low_magnitude
pruned_model = prune_low_magnitude(model, **pruning_params)

# compile pruned model
pruned_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

# fit model with update callback
update_pruning = tfmot.sparsity.keras.UpdatePruningStep()
pruned_model.fit(x_train, y_train,
                 epochs=1, # train for more epochs for better performance
                 callbacks=[update_pruning])

# strip pruning after training
from tensorflow_model_optimization.sparsity.keras import strip_pruning
pruned_model = strip_pruning(pruned_model)

# fine-tune after pruning
pruned_model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
pruned_model.fit(x_train, y_train, epochs=1)

# save and reload model (optional)
filepath = 'pruned-model'
pruned_model.save(filepath) # to save
with tfmot.sparsity.keras.prune_scope(): # to reload
    pruned_model = keras.models.load_model(filepath)





### Pruning Individual Layers

Method 1 - simpler and more intuitive, but cannot perform pretraining.

In [8]:
# import pruning marker
from tensorflow_model_optimization.sparsity.keras import prune_low_magnitude as plm

# create model using pruning marker
pruned_model = keras.Sequential()
pruned_model.add(L.Input((784,)))
pruned_model.add(L.Dense(2**9))
pruned_model.add(plm(L.Dense(2**8), **pruning_params))
pruned_model.add(plm(L.Dense(2**7), **pruning_params))
pruned_model.add(plm(L.Dense(2**6), **pruning_params))
pruned_model.add(plm(L.Dense(2**5)))
pruned_model.add(L.Dense(10, activation='softmax'))

# compile pruned model
pruned_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

# fit model with update callback
update_pruning = tfmot.sparsity.keras.UpdatePruningStep()
pruned_model.fit(x_train, y_train,
                 epochs=1, # train for more epochs for better performance
                 callbacks=[update_pruning])





<tensorflow.python.keras.callbacks.History at 0x7f32faa05550>

Method 2 - cloning, allows for pretraining.

In [9]:
# define cloning function
def cloning_func(layer):
    
    # is it a Dense layer?
    if isinstance(layer, keras.layers.Dense):
        return plm(layer)

    # does it have a certain name?
    if layer.name == 'dense5':
        return plm(layer)

    # if does not meet any conditions for pruning
    return layer

# create pruned model by applying pruning function
pruned_model = keras.models.clone_model(
    model, # model is the baseline model trained earlier
clone_function = cloning_func
)

# compile pruned model
pruned_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

# fit model with update callback
update_pruning = tfmot.sparsity.keras.UpdatePruningStep()
pruned_model.fit(x_train, y_train,
                 epochs=1, # train for more epochs for better performance
                 callbacks=[update_pruning])



<tensorflow.python.keras.callbacks.History at 0x7f32f9e37990>

---

## Quantization

### Quantize Entire Model

In [10]:
# quantize entire model
from tensorflow_model_optimization.quantization.keras import quantize_model
qat_model = quantize_model(model) # model here is the baseline model previously trained

# compile quantize-annotated model
qat_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

# fit model to be quantization-aware
qat_model.fit(x_train, y_train, 
              batch_size=512,
              epochs=1)

# convert from quantize-aware to quantized model via TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(    
    qat_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

# store TFLite model
with open('model.tflite', 'wb') as f:
    f.write(quantized_tflite_model)

# zip the file the model is stored in
_, zippedfile = tempfile.mkstemp(".zip")
with zf.ZipFile(zippedfile, "w", 
                compression=zf.ZIP_DEFLATED) as f:
    f.write('model.tflite')



### Quantize Individual Layers

Method 1 - simpler and more intuitive, but cannot perform pretraining.

In [11]:
# import quantization marker
from tensorflow_model_optimization.quantization.keras import quantize_annotate_layer as qal

# create model using quantization marker
annotated_model = keras.Sequential()
annotated_model.add(L.Input((784,)))
annotated_model.add(qal(L.Dense(2**9)))
annotated_model.add(L.Activation('relu'))
annotated_model.add(qal(L.Dense(2**8)))
annotated_model.add(L.Activation('relu'))
annotated_model.add(qal(L.Dense(2**7)))
annotated_model.add(L.Activation('relu'))
annotated_model.add(L.Dense(2**6, activation='relu'))
annotated_model.add(L.Dense(2**5, activation='relu'))
annotated_model.add(L.Dense(10, activation='softmax'))

# apply quantization to annotations
from tensorflow_model_optimization.quantization.keras import quantize_apply
quantized_model = quantize_apply(annotated_model)

# compile model
quantized_model.compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

# fit model
quantized_model.fit(x_train, y_train,
                    epochs=1) # train for more epochs for better performance



<tensorflow.python.keras.callbacks.History at 0x7f32f8748550>

Method 2 - cloning, allows for pretraining.

In [12]:
# define cloning function
def cloning_func(layer):
    
    # is it a Dense layer?
    if isinstance(layer, keras.layers.Dense):
        return qal(layer)
    
    # does it have a certain name?
    if layer.name == 'dense5':
        return qal(layer)
    
    # if does not meet any conditions for quantization
    return layer    

# apply cloning function to model
annotated_model = keras.models.clone_model(
    model,
    clone_function = cloning_func
)

# apply quantization to annotations
quantized_model = quantize_apply(annotated_model)

# compile model
quantized_model.compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

# fit model
quantized_model.fit(x_train, y_train,
                    epochs=1) # train for more epochs for better performance



<tensorflow.python.keras.callbacks.History at 0x7f32fae97d10>

---

## Weight Clustering

### Weight Clustering Entire Model

In [13]:
# define weight initialization parameters
CentroidInit = tensorflow_model_optimization.clustering.keras.CentroidInitialization
clustering_params = {
    'number_of_clusters': 30,
    'cluster_centroids_init': CentroidInit.DENSITY_BASED
}

# perform clustering on weights
from tensorflow_model_optimization.clustering.keras import cluster_weights
clustered_model = cluster_weights(model, **clustering_params)

# compile
clustered_model.compile(optimizer='adam',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])

# fit
clustered_model.fit(x_train, y_train, epochs=1)

# strip clustering
from tensorflow_model_optimization.clustering.keras import strip_clustering
final_model = strip_clustering(clustered_model)



### Weight Clustering Specific Layers

Method 1 - simpler and more intuitive, but cannot perform pretraining.

In [14]:
# import clustering operation
from tensorflow_model_optimization.clustering.keras import cluster_weights as cw

# create model using quantization marker
clustered_model = keras.Sequential()
clustered_model.add(L.Input((784,)))
clustered_model.add(cw(L.Dense(2**9), **clustering_params))
clustered_model.add(L.Activation('relu'))
clustered_model.add(cw(L.Dense(2**8), **clustering_params))
clustered_model.add(L.Activation('relu'))
clustered_model.add(cw(L.Dense(2**7), **clustering_params))
clustered_model.add(L.Activation('relu'))
clustered_model.add(L.Dense(2**6, activation='relu'))
clustered_model.add(L.Dense(2**5, activation='relu'))
clustered_model.add(L.Dense(10, activation='softmax'))

# compile model
clustered_model.compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

# fit model
clustered_model.fit(x_train, y_train,
                    epochs=1) # train for more epochs for better performance

# strip clustering
from tensorflow_model_optimization.clustering.keras import strip_clustering
final_model = strip_clustering(clustered_model)



Method 2 - cloning, allows for pretraining.

In [15]:
# We need to recreate a baseline model for technical purposes or an error will be raised
# construct Sequential model
model = keras.Sequential()

# construct Input
model.add(L.Input((784,)))

# construct processing layers
for i in list(range(5,10))[::-1]:
    model.add(L.Dense(2**i, activation='relu'))
model.add(L.Dense(2**i, activation='relu'))

# construct output layer
model.add(L.Dense(10, activation='softmax'))

# compile and fit
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance

# -------------

# define cloning function
def cloning_func(layer):
    
    # is it a Dense layer?
    if isinstance(layer, keras.layers.Dense):
        return cw(layer, **clustering_params)
    
    # does it have a certain name?
    if layer.name == 'dense5':
        return cw(layer, **clustering_params)
    
    # if does not meet any conditions for quantization
    return layer    

# apply cloning function to model
clustered_model = keras.models.clone_model(
    model,
    clone_function = cloning_func
)

# compile model
clustered_model.compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])

# fit model
clustered_model.fit(x_train, y_train,
                    epochs=1) # train for more epochs for better performance

# strip clustering
from tensorflow_model_optimization.clustering.keras import strip_clustering
final_model = strip_clustering(clustered_model)



---

## Collaborative Optimization

### Sparsity Preserving Quantization

In [16]:
# We need to recreate a baseline model for technical purposes or an error will be raised
# construct Sequential model
model = keras.Sequential()

# construct Input
model.add(L.Input((784,)))

# construct processing layers
for i in list(range(5,10))[::-1]:
    model.add(L.Dense(2**i, activation='relu'))
model.add(L.Dense(2**i, activation='relu'))

# construct output layer
model.add(L.Dense(10, activation='softmax'))

# compile and fit
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance

# -------------

# PRUNING
# create pruning schedule
from tensorflow_model_optimization.sparsity.keras import PolynomialDecay as PD
end_step = np.ceil(len(x_train)/32) * 1
schedule = PD(initial_sparsity=0.50,
              final_sparsity=0.95,
              begin_step=0,
              end_step=end_step,
              frequency=128)

# define pruning parameters
pruning_params = {
    'pruning_schedule': schedule
}

# create pruning model
from tensorflow_model_optimization.sparsity.keras import prune_low_magnitude
pruned_model = prune_low_magnitude(model, **pruning_params)

# compile pruned model
pruned_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

# fit model with update callback
update_pruning = tfmot.sparsity.keras.UpdatePruningStep()
pruned_model.fit(x_train, y_train,
                 epochs=1, # train for more epochs for better performance
                 callbacks=[update_pruning])

# strip pruning after training
from tensorflow_model_optimization.sparsity.keras import strip_pruning
pruned_model = strip_pruning(pruned_model)

# fine-tune after pruning
pruned_model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
pruned_model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance

# -------------

# QUANTIZATION
# annotate entire model
from tensorflow_model_optimization.quantization.keras import quantize_annotate_model
annot_quant_model = quantize_annotate_model(pruned_model)

# specify combining method (pruning)
from tensorflow_model_optimization.experimental.combine import Default8BitPrunePreserveQuantizeScheme as preserve_pruning

# apply quantization to annotated model
from tensorflow_model_optimization.quantization.keras import quantize_apply
pqat_model = quantize_apply(annot_quant_model,
                            preserve_pruning())

# compile and fit
pqat_model.compile(optimizer='adam',
                   loss='categorical_crossentropy')
pqat_model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance







<tensorflow.python.keras.callbacks.History at 0x7f32f838be10>

### Cluster Preserving Quantization

In [17]:
# We need to recreate a baseline model for technical purposes or an error will be raised
# construct Sequential model
model = keras.Sequential()

# construct Input
model.add(L.Input((784,)))

# construct processing layers
for i in list(range(5,10))[::-1]:
    model.add(L.Dense(2**i, activation='relu'))
model.add(L.Dense(2**i, activation='relu'))

# construct output layer
model.add(L.Dense(10, activation='softmax'))

# compile and fit
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1) # train for more epochs for better performance

# -------------

# CLUSTERING MODEL
# define weight initialization parameters
CentroidInit = tensorflow_model_optimization.clustering.keras.CentroidInitialization
clustering_params = {
    'number_of_clusters': 30,
    'cluster_centroids_init': CentroidInit.DENSITY_BASED
}

# perform clustering on weights
from tensorflow_model_optimization.clustering.keras import cluster_weights
clustered_model = cluster_weights(model, **clustering_params)

# compile
clustered_model.compile(optimizer='adam',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])

# fit
clustered_model.fit(x_train, y_train, epochs=1)

# strip clustering
from tensorflow_model_optimization.clustering.keras import strip_clustering
cluster_model = strip_clustering(clustered_model)

# -------------

# QUANTIZATION
# annotate entire model
from tensorflow_model_optimization.quantization.keras import quantize_annotate_model
annot_quant_model = quantize_annotate_model(cluster_model)

# specify combining method (pruning)
from tensorflow_model_optimization.experimental.combine import Default8BitClusterPreserveQuantizeScheme as preserve_clustering

# apply quantization to annotated model
from tensorflow_model_optimization.quantization.keras import quantize_apply
pqat_model = quantize_apply(annot_quant_model,
                            preserve_clustering())

# compile and fit
pqat_model.compile(optimizer='adam',
                   loss='categorical_crossentropy')
pqat_model.fit(x_train, y_train, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f32dc4ee190>

### Sparsity Preserving Clustering

In [18]:
# We need to recreate a baseline model for technical purposes or an error will be raised
# construct Sequential model
model = keras.Sequential()

# construct Input
model.add(L.Input((784,)))

# construct processing layers
for i in list(range(5,10))[::-1]:
    model.add(L.Dense(2**i, activation='relu'))
model.add(L.Dense(2**i, activation='relu'))

# construct output layer
model.add(L.Dense(10, activation='softmax'))

# -------------

# PRUNING
# create pruning schedule
from tensorflow_model_optimization.sparsity.keras import PolynomialDecay as PD
end_step = np.ceil(len(x_train)/32) * 1
schedule = PD(initial_sparsity=0.50,
              final_sparsity=0.95,
              begin_step=0,
              end_step=end_step,
              frequency=128)

# define pruning parameters
pruning_params = {
    'pruning_schedule': schedule
}

# create pruning model
from tensorflow_model_optimization.sparsity.keras import prune_low_magnitude
pruned_model = prune_low_magnitude(model, **pruning_params)

# compile pruned model
pruned_model.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

# fit model with update callback
update_pruning = tfmot.sparsity.keras.UpdatePruningStep()
pruned_model.fit(x_train, y_train,
                 epochs=1, # train for more epochs for better performance
                 callbacks=[update_pruning])

# strip pruning after training
from tensorflow_model_optimization.sparsity.keras import strip_pruning
pruned_model = strip_pruning(pruned_model)

# fine-tune after pruning
pruned_model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
pruned_model.fit(x_train, y_train, epochs=1)

# -------------

# CLUSTERING
# performing clustering
from tensorflow_model_optimization.python.core.clustering.keras.experimental.cluster import cluster_weights

# specify centroid initialization style
from tensorflow_model_optimization.clustering.keras import CentroidInitialization
CentroidInit = CentroidInitialization.DENSITY_BASED

# put clustering parameters into dictionary
clustering_params = {'number_of_clusters': 8,
                 'cluster_centroids_init': CentroidInit,
                 'preserve_sparsity': True}

# create sparsity preserving clustering model
spc = cluster_weights(pruned_model, **clustering_params)

# compile and fit
spc.compile(optimizer='adam',
            loss='categorical_crossentropy')
spc.fit(x_train, y_train, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f32dc1edd90>

---

## Versions

See versions for all libraries used if you run into any errors.

In [19]:
!pip list

Package                        Version             Location
------------------------------ ------------------- --------------
absl-py                        0.12.0
affine                         2.3.0
aiobotocore                    1.3.1
aiohttp                        3.7.4.post0
aiohttp-cors                   0.7.0
aioitertools                   0.7.1
aioredis                       1.3.1
albumentations                 1.0.1
alembic                        1.6.5
allennlp                       2.5.0
altair                         4.1.0
annoy                          1.17.0
ansiwrap                       0.8.4
anyio                          3.2.0
appdirs                        1.4.4
argon2-cffi                    20.1.0
arrow                          1.1.0
arviz                          0.11.2
asn1crypto                     1.4.0
astunparse                     1.6.3
async-generator                1.10
async-timeout                  3.0.1
attrs                      

---