# Part 4: Quantization

In [1]:
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
seed = 0
np.random.seed(seed)
import tensorflow as tf

tf.random.set_seed(seed)
import os

os.environ['PATH'] = '/tools/Xilinx/Vitis/2022.1/bin:' + os.environ['PATH']

2023-07-16 02:30:32.593771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-16 02:30:32.684264: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
data = fetch_openml('hls4ml_lhc_jets_hlf')
X, y = data['data'], data['target']

  warn(


In [3]:
le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y, 5)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y[:5])

[[1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0.]]


In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Construct a model
This time we're going to use QKeras layers.
QKeras is "Quantized Keras" for deep heterogeneous quantization of ML models.

https://github.com/google/qkeras

It is maintained by Google and we recently added support for QKeras model to hls4ml.

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from callbacks import all_callbacks
from tensorflow.keras.layers import Activation
from qkeras.qlayers import QDense, QActivation
from qkeras.quantizers import quantized_bits, quantized_relu

We're using `QDense` layer instead of `Dense`, and `QActivation` instead of `Activation`. We're also specifying `kernel_quantizer = quantized_bits(6,0,0)`. This will use 6-bits (of which 0 are integer) for the weights. We also use the same quantization for the biases, and `quantized_relu(6)` for 6-bit ReLU activations.

In [6]:
layer_1=16
layer_2=64
layer_3=32
layer_4=32
layer_5=5
int_bits=0
sign_bit=1
bits=8
model = Sequential()
model.add(QDense(layer_2, input_shape=(layer_1,), name='fc1', kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True),bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)   ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu1'))
model.add(QDense(layer_3, name='fc2',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu2'))
model.add(QDense(layer_4, name='fc3',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu3'))
model.add(QDense(layer_5, name='output',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(Activation(activation='softmax', name='softmax'))

2023-07-16 02:30:54.201239: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [7]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning
from qkeras.utils import model_save_quantized_weights

pruning_params = {"pruning_schedule" : pruning_schedule.ConstantSparsity(0, begin_step=2000, frequency=100)}
model = prune.prune_low_magnitude(model, **pruning_params)

adam = Adam(lr=0.0001)

model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
callbacks= all_callbacks( outputDir = 'jt_classification')
callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())
model.fit(X_train, Y_train, batch_size=1024,
          epochs=14,validation_split=0.25, verbose=1, shuffle=True,
          callbacks = callbacks.callbacks);
model = strip_pruning(model)
model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
model_save_quantized_weights(model, "test_weights")
model.save("Jet_tagging")











Epoch 1/14
***callbacks***
saving losses to jt_classification/losses.log

Epoch 1: val_loss improved from inf to 0.81191, saving model to jt_classification/KERAS_check_best_model.h5

Epoch 1: val_loss improved from inf to 0.81191, saving model to jt_classification/KERAS_check_best_model_weights.h5

Epoch 1: saving model to jt_classification/KERAS_check_model_last.h5

Epoch 1: saving model to jt_classification/KERAS_check_model_last_weights.h5

***callbacks end***

Epoch 2/14
***callbacks***
saving losses to jt_classification/losses.log

Epoch 2: val_loss improved from 0.81191 to 0.76310, saving model to jt_classification/KERAS_check_best_model.h5

Epoch 2: val_loss improved from 0.81191 to 0.76310, saving model to jt_classification/KERAS_check_best_model_weights.h5

Epoch 2: saving model to jt_classification/KERAS_check_model_last.h5

Epoch 2: saving model to jt_classification/KERAS_check_model_last_weights.h5

***callbacks end***

Epoch 3/14
***callbacks***
saving losses to jt_classif


Epoch 12: saving model to jt_classification/KERAS_check_model_last_weights.h5

***callbacks end***

Epoch 13/14
***callbacks***
saving losses to jt_classification/losses.log

Epoch 13: val_loss improved from 0.70694 to 0.70544, saving model to jt_classification/KERAS_check_best_model.h5

Epoch 13: val_loss improved from 0.70694 to 0.70544, saving model to jt_classification/KERAS_check_best_model_weights.h5

Epoch 13: saving model to jt_classification/KERAS_check_model_last.h5

Epoch 13: saving model to jt_classification/KERAS_check_model_last_weights.h5

***callbacks end***

Epoch 14/14
***callbacks***
saving losses to jt_classification/losses.log

Epoch 14: val_loss improved from 0.70544 to 0.70502, saving model to jt_classification/KERAS_check_best_model.h5

Epoch 14: val_loss improved from 0.70544 to 0.70502, saving model to jt_classification/KERAS_check_best_model_weights.h5

Epoch 14: saving model to jt_classification/KERAS_check_model_last.h5

Epoch 14: saving model to jt_classi



INFO:tensorflow:Assets written to: Jet_tagging/assets


INFO:tensorflow:Assets written to: Jet_tagging/assets


In [None]:
model.get_weights()[2]

In [None]:
model.get_weights()[2]

In [None]:
Overall_LUTs

In [None]:
model.get_weights()[0]

In [76]:
import estimate as es

In [77]:
from importlib import reload
reload(es)

<module 'estimate' from '/home/edge/Desktop/argykokk/hls4ml-tutorial/networks/jet_tagging/estimate.py'>

In [80]:
param=1
reuse=100

In [81]:
%%time
#input_num, neurons_num, layer_id, model
ffs = es.estimate(16,64,0,model,reuse,param)
luts, ffs = es.estimate(64,32,2,model,reuse,param)
luts, ffs = es.estimate(32,32,4,model,reuse,param)
luts, ffs = es.estimate(32,5,6,model,reuse,param)

zero weight are:  165
Mul ins = 247 and Max muls = 9 and Saved muls = 238 Reuse factor = 100
Muxes LUTS: 3094
LUT cost1=: 558 cost2= 5058 bias acc= 896 mult acc11130
LUTs prediction: 17642
Initial muls: 1024 Real muls: 859 Initial neurons: 64 Tuned neurons: 54
FFs prediction: 16262
zero weight are:  930
Mul ins = 275 and Max muls = 12 and Saved muls = 263 Reuse factor = 100
Muxes LUTS: 3419
LUT cost1=: 744 cost2= 7731 bias acc= 448 mult acc15204
LUTs prediction: 24127
Initial muls: 2048 Real muls: 1118 Initial neurons: 32 Tuned neurons: 18
FFs prediction: 22560
zero weight are:  438
Mul ins = 202 and Max muls = 6 and Saved muls = 196 Reuse factor = 100
Muxes LUTS: 2548
LUT cost1=: 372 cost2= 4000 bias acc= 448 mult acc7756
LUTs prediction: 12576
Initial muls: 1024 Real muls: 586 Initial neurons: 32 Tuned neurons: 19
FFs prediction: 12181
zero weight are:  27
Mul ins = 85 and Max muls = 2 and Saved muls = 83 Reuse factor = 100
Muxes LUTS: 1079
LUT cost1=: 124 cost2= 739 bias acc= 70 mul

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [11]:
import hls4ml
import plotting

config = hls4ml.utils.config_from_keras_model(model, granularity='name')
config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'
config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'
print("-----------------------------------")
plotting.print_dict(config)
print("-----------------------------------")
hls_model = hls4ml.converters.convert_from_keras_model(
    model, hls_config=config, output_dir='jet_tagging/hls4ml_prj', part='xc7z007s-clg225-2'
)
hls_model.compile()

y_qkeras = model.predict(np.ascontiguousarray(X_test))
y_hls = hls_model.predict(np.ascontiguousarray(X_test))



None
Interpreting Sequential
Topology:
Layer name: fc1_input, layer type: InputLayer, input shapes: [[None, 16]], output shape: [None, 16]
Layer name: fc1, layer type: QDense, input shapes: [[None, 16]], output shape: [None, 64]
Layer name: relu1, layer type: Activation, input shapes: [[None, 64]], output shape: [None, 64]
Layer name: fc2, layer type: QDense, input shapes: [[None, 64]], output shape: [None, 32]
Layer name: relu2, layer type: Activation, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: fc3, layer type: QDense, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: relu3, layer type: Activation, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: output, layer type: QDense, input shapes: [[None, 32]], output shape: [None, 5]
Layer name: softmax, layer type: Softmax, input shapes: [[None, 5]], output shape: [None, 5]
-----------------------------------
Model
  Precision:         fixed<16,6>
  ReuseFactor:       1
  Strategy:         