# Part 4: Quantization

In [None]:
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
seed = 0
np.random.seed(seed)
import tensorflow as tf

tf.random.set_seed(seed)
import os

os.environ['PATH'] = '/tools/Xilinx/Vitis/2022.1/bin:' + os.environ['PATH']

In [None]:
data = fetch_openml('hls4ml_lhc_jets_hlf')
X, y = data['data'], data['target']

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y, 5)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y[:5])

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Construct a model
This time we're going to use QKeras layers.
QKeras is "Quantized Keras" for deep heterogeneous quantization of ML models.

https://github.com/google/qkeras

It is maintained by Google and we recently added support for QKeras model to hls4ml.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from callbacks import all_callbacks
from tensorflow.keras.layers import Activation
from qkeras.qlayers import QDense, QActivation
from qkeras.quantizers import quantized_bits, quantized_relu

We're using `QDense` layer instead of `Dense`, and `QActivation` instead of `Activation`. We're also specifying `kernel_quantizer = quantized_bits(6,0,0)`. This will use 6-bits (of which 0 are integer) for the weights. We also use the same quantization for the biases, and `quantized_relu(6)` for 6-bit ReLU activations.

In [None]:
layer_1=16
layer_2=64
layer_3=32
layer_4=32
layer_5=5
int_bits=0
sign_bit=1
bits=8
model = Sequential()
model.add(QDense(layer_2, input_shape=(layer_1,), name='fc1', kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True),bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)   ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu1'))
model.add(QDense(layer_3, name='fc2',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu2'))
model.add(QDense(layer_4, name='fc3',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(QActivation(activation=quantized_relu(bits,int_bits,use_stochastic_rounding=False), name='relu3'))
model.add(QDense(layer_5, name='output',
                kernel_quantizer=quantized_bits(bits,int_bits,alpha=1,use_stochastic_rounding=True), bias_quantizer=quantized_bits(bits,int_bits,alpha=1),
                kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001 ) ))
model.add(Activation(activation='softmax', name='softmax'))

In [None]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning
from qkeras.utils import model_save_quantized_weights

pruning_params = {"pruning_schedule" : pruning_schedule.ConstantSparsity(0, begin_step=2000, frequency=100)}
model = prune.prune_low_magnitude(model, **pruning_params)

adam = Adam(lr=0.0001)

model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
callbacks= all_callbacks( outputDir = 'jt_classification')
callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())
model.fit(X_train, Y_train, batch_size=1024,
          epochs=14,validation_split=0.25, verbose=1, shuffle=True,
          callbacks = callbacks.callbacks);
model = strip_pruning(model)
model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
model_save_quantized_weights(model, "test_weights")
model.save("Jet_tagging")

In [None]:
model.get_weights()[2]

In [None]:
model.get_weights()[2]

In [None]:
Overall_LUTs

In [None]:
model.get_weights()[0]

In [None]:
import estimate as es

In [None]:
from importlib import reload
reload(es)

In [None]:
param=1
reuse=100

In [None]:
%%time
#input_num, neurons_num, layer_id, model
ffs = es.estimate(16,64,0,model,reuse,param)
luts, ffs = es.estimate(64,32,2,model,reuse,param)
luts, ffs = es.estimate(32,32,4,model,reuse,param)
luts, ffs = es.estimate(32,5,6,model,reuse,param)

In [None]:
import hls4ml
import plotting

config = hls4ml.utils.config_from_keras_model(model, granularity='name')
config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'
config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'
print("-----------------------------------")
plotting.print_dict(config)
print("-----------------------------------")
hls_model = hls4ml.converters.convert_from_keras_model(
    model, hls_config=config, output_dir='jet_tagging/hls4ml_prj', part='xc7z007s-clg225-2'
)
hls_model.compile()

y_qkeras = model.predict(np.ascontiguousarray(X_test))
y_hls = hls_model.predict(np.ascontiguousarray(X_test))