In [1]:
#
# Choose the layer to test
#
#LAYER = 'depthwise_conv2d' # 
#LAYER = 'pointwise_conv2d'
LAYER = 'separable_conv2d' # uses both depthwise and pointwise

RUN_HLS = True
IO_TYPE = 'io_parallel'
STRATEGY = 'Latency'
BACKEND = 'Vivado'

H = 5    # Input height
W = 6    # Input width
Din = 1  # Input # of channels
Fh = 3   # Kernel height
Fw = 3   # Kernel width
Dout = 2 # Kernel # of filters

B = 1   # Test set batch size

FXD_W = 12 # Fixed-point precision, word bit width
FXD_I = 11 # Fixed-point precision, integer-part bit width

In [2]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

from tensorflow import keras
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Sequential

from qkeras import *

2023-12-07 14:26:49.995248: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import os

In [4]:
os.environ['PATH'] = '/home/xilinx/Vivado/2019.1/bin:' + os.environ['PATH'] 

In [5]:
def CreateKerasModel(layer, input_shape, kernel_size, filters):
 
    x_in = Input(input_shape, name='input_1')
    if layer == 'depthwise_conv2d':
        x_out = DepthwiseConv2D(
            kernel_size=kernel_size,
            use_bias=False,
            depthwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            bias_initializer=tf.keras.initializers.Zeros(), # makes debugging easy
            name='depthwise_conv2d'
        )(x_in)
    elif layer == 'pointwise_conv2d':
        x_out = Conv2D(
            filters=filters,
            kernel_size=(1,1),
            use_bias=False,
            kernel_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            bias_initializer=tf.keras.initializers.Zeros(), # makes debugging easy
            name='pointwise_conv2d'
        )(x_in)
    else:
        x_out = SeparableConv2D(
            filters=filters,
            kernel_size=kernel_size,
            use_bias=False,
            depthwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            pointwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            name = 'separable_conv2d'
    )(x_in)
    kmodel = Model(inputs=x_in, outputs=x_out)
    return kmodel

In [6]:
kmodel = CreateKerasModel(LAYER, input_shape=(H,W,Din), kernel_size=(Fh, Fw), filters=Dout)
kmodel.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 6, 1)]         0         
                                                                 
 separable_conv2d (Separable  (None, 3, 4, 2)          11        
 Conv2D)                                                         
                                                                 
Total params: 11
Trainable params: 11
Non-trainable params: 0
_________________________________________________________________


2023-12-07 14:26:53.557664: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-07 14:26:53.573262: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-07 14:26:53.573621: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-07 14:26:53.574076: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operation

In [7]:
def CreateQKerasModel(type, input_shape, kernel_size, filters):
    # # Generate the same random values
    # import random
    # import numpy as np
    # import tensorflow as tf

    # random.seed(42)
    # np.random.seed(42)
    # tf.random.set_seed(42)
    
    x_in = Input(input_shape, name='q_input_1')
    if type == 'depthwise_conv2d':
        x_out = QDepthwiseConv2D(
            kernel_size=kernel_size,
            use_bias=False,
            depthwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            depthwise_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            bias_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            name='q_depthwise_conv2d'
        )(x_in)
    elif type == 'pointwise_conv2d':
        x_out = QConv2D(
            kernel_size=(1,1), # 1x1 convolution
            filters=filters,
            use_bias=False,
            kernel_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            kernel_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            bias_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            name='q_pointwise_conv2d'
        )(x_in)
    else:
        x_out = QSeparableConv2D(
            filters=filters,
            kernel_size=kernel_size,
            use_bias=False,
            depthwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            pointwise_initializer=tf.keras.initializers.Ones(), # makes debugging easy
            depthwise_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            pointwise_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            bias_quantizer=quantized_bits(FXD_W, FXD_I-1, 1, alpha=1),
            name='q_separable_conv2d'
        )(x_in)
    
    qkmodel = Model(inputs=x_in, outputs=x_out)
    return qkmodel

In [8]:
qkmodel = CreateQKerasModel(LAYER, input_shape=(H,W,Din), kernel_size=(Fh, Fw), filters=Dout)
qkmodel.summary()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 q_input_1 (InputLayer)      [(None, 5, 6, 1)]         0         
                                                                 
 q_separable_conv2d (QSepara  (None, 3, 4, 2)          11        
 bleConv2D)                                                      
                                                                 
Total params: 11
Trainable params: 11
Non-trainable params: 0
_________________________________________________________________


In [9]:
import hls4ml.utils
import hls4ml.converters

WARN: Unable to import optimizer(s) from expr_templates.py: No module named 'sympy'




In [10]:
config = hls4ml.utils.config_from_keras_model(qkmodel, granularity='name')
print("-----------------------------------")

Interpreting Model
Topology:
Layer name: q_input_1, layer type: InputLayer, input shapes: [[None, 5, 6, 1]], output shape: [None, 5, 6, 1]
Layer name: q_separable_conv2d, layer type: QSeparableConv2D, input shapes: [[None, 5, 6, 1]], output shape: [None, 3, 4, 2]
-----------------------------------


In [11]:
config['LayerName']['q_input_1']['Precision']['result'] = 'fixed<{},{}>'.format(FXD_W, FXD_I)

config['Model']['Strategy'] = STRATEGY
#config['Model']['Strategy'] = 'Resource'

hls_model = hls4ml.converters.convert_from_keras_model(
    qkmodel, 
    hls_config=config, 
    output_dir='hls4ml_prj', 
    part='xcu250-figd2104-2L-e',
    backend=BACKEND,
    io_type=IO_TYPE
)

hls_model.compile()

Interpreting Model
Topology:
Layer name: q_input_1, layer type: InputLayer, input shapes: [[None, 5, 6, 1]], output shape: [None, 5, 6, 1]
Layer name: q_separable_conv2d, layer type: QSeparableConv2D, input shapes: [[None, 5, 6, 1]], output shape: [None, 3, 4, 2]
Creating HLS model
Writing HLS project
Done


In [12]:
#m = hls4ml.converters.keras_to_hls(config)

In [15]:
hls_model.build(csim=False)


****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2019.1 (64-bit)
  **** SW Build 2552052 on Fri May 24 14:47:09 MDT 2019
  **** IP Build 2548770 on Fri May 24 18:01:18 MDT 2019
    ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.

source /home/xilinx/Vivado/2019.1/scripts/vivado_hls/hls.tcl -notrace
INFO: Applying HLS Y2K22 patch v1.2 for IP revision
INFO: [HLS 200-10] Running '/home/xilinx/Vivado/2019.1/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'russelld' on host 'scully.physics.ucsd.edu' (Linux_x86_64 version 4.18.0-348.12.2.el8_5.x86_64) on Thu Dec 07 14:27:34 PST 2023
INFO: [HLS 200-10] In directory '/home/users/russelld/SepConv2D_hls4ml/hls4ml_prj'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Opening project '/home/users/russelld/SepConv2D_hls4ml/hls4ml_prj/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject.cpp' to the project
INFO: [HLS 200-10] Adding test bench file 'myproject_test.cpp' to the pro

{}

In [14]:
import hls4ml
hls4ml.__file__

'/home/users/russelld/SepConv2D_hls4ml/Software/hls4ml/hls4ml/__init__.py'