In [1]:
from tensorflow.keras.layers import Layer
import tensorflow as tf

class DotProduct(tf.keras.layers.Layer):
    def __init__(self, input_dim=32):
        super().__init__()
        self.w = self.add_weight(
            shape=(input_dim, 1), initializer="random_normal", trainable=True, name='{}/kernel'.format(self.name)
        )
        self.b = self.add_weight(shape=(1,), initializer="zeros", trainable=True, name='{}/bias'.format(self.name))

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

2023-07-18 15:02:30.747243: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-18 15:02:30.859728: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-07-18 15:02:30.859760: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-07-18 15:02:31.485095: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
import hls4ml
class HDotProduct(hls4ml.model.layers.Layer):
    _expected_attributes = [
    ]

    def initialize(self):
        weight = self.get_attr('weight_data')
        bias = self.get_attr('bias_data')

        self.add_weights_variable(name='weight', var_name='w{index}', data=weight)
        self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
        self.add_output_variable(shape=[1], dim_names=[f'N_SIZE_1_{self.index}'])
hls4ml.model.layers.register_layer('DotProduct', HDotProduct)





In [3]:
import numpy as np
from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer

def parse_dot_product_layer(keras_layer, input_names, input_shapes, data_reader):
    assert keras_layer["class_name"] == 'DotProduct'

    layer = parse_default_keras_layer(keras_layer, input_names)
    layer['weight_data'], layer['bias_data'] = get_weights_data(data_reader, layer['name'], ['kernel', 'bias'])
    layer['class_name'] = 'DotProduct'
    layer['n_in'] = input_shapes[0][-1]
    layer['n_out'] = 1
    output_shape = layer['n_out'] 
    return layer, output_shape

import hls4ml
hls4ml.converters.register_keras_layer_handler('DotProduct', parse_dot_product_layer)

In [4]:
dot_config_template = """struct config{index} : nnet::dot_product_config {{
    static const unsigned rows = {n_in};
}};\n"""

dot_function_template = 'nnet::dot_product<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'
dot_include_list = ['nnet_utils/nnet_dot_product_stream.h']

class DotConfigTemplate(hls4ml.backends.template.LayerConfigTemplate):
    def __init__(self):
        super().__init__(HDotProduct)
        self.template = dot_config_template

    def format(self, node):
        params = self._default_config_params(node)
        return self.template.format(**params)
    
class DotFunctionTemplate(hls4ml.backends.template.FunctionCallTemplate):
    def __init__(self):
        super().__init__(HDotProduct, include_header=dot_include_list)
        self.template = dot_function_template

    def format(self, node):
        params = self._default_function_params(node)
        params['w'] = node.get_weights('weight').name
        params['b'] = node.get_weights('bias').name
        return self.template.format(**params)

In [5]:
for backend_id in ['Vivado', 'Quartus', 'VivadoAccelerator']:
    # Register the optimization passes (if any)
    backend = hls4ml.backends.get_backend(backend_id)
    #backend.register_pass('remove_duplicate_reverse', RemoveDuplicateReverse, flow=f'{backend_id.lower()}:optimize')

    # Register template passes for the given backend
    backend.register_template(DotConfigTemplate)
    backend.register_template(DotFunctionTemplate)

    # Register HLS implementation
    backend.register_source('/home/hisky/Lab-undergrad/Lab4/nnet_dot_product_stream.h')

In [6]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

x = Input(shape=(32))
y = DotProduct(32)(x)
model = Model(inputs=x, outputs=y)
model.summary()

2023-07-18 15:02:34.222550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-07-18 15:02:34.222586: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-07-18 15:02:34.222620: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (r7515ed520): /proc/driver/nvidia/version does not exist
2023-07-18 15:02:34.222886: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32)]              0         
                                                                 
 dot_product (DotProduct)    (None, 1)                 33        
                                                                 
Total params: 33
Trainable params: 33
Non-trainable params: 0
_________________________________________________________________


In [7]:
import hls4ml
#hls4ml.model.optimizer.OutputRoundingSaturationMode.layers = ['Activation']
#hls4ml.model.optimizer.OutputRoundingSaturationMode.rounding_mode = 'AP_RND_CONV'
#hls4ml.model.optimizer.OutputRoundingSaturationMode.saturation_mode = 'AP_SAT'
config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ap_fixed<32,16>')
import pprint
pprint.pprint(config)  
hls_model = hls4ml.converters.convert_from_keras_model(model,
                                                       hls_config=config,
                                                       output_dir='./hls_model/dotproduct',
                                                       io_type = 'io_stream',
                                                       #backend='VivadoAccelerator', board='pynq-z2')
                                                       part='xc7z020clg400-1')
hls_model.compile()

Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: dot_product, layer type: DotProduct, input shapes: [[None, 32]], output shape: 1
{'LayerName': {'dot_product': {'Precision': {'result': 'ap_fixed<32,16>'},
                               'Trace': False},
               'input_1': {'Precision': {'result': 'ap_fixed<32,16>'},
                           'Trace': False}},
 'Model': {'BramFactor': 1000000000,
           'Precision': 'ap_fixed<32,16>',
           'ReuseFactor': 1,
           'Strategy': 'Latency',
           'TraceOutput': False}}
Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: dot_product, layer type: DotProduct, input shapes: [[None, 32]], output shape: 1
Creating HLS model
{'Backend': 'Vivado',
 'ClockPeriod': 5,
 'HLSConfig': {'LayerName': {'dot_product': {'Precision': {'result': 'ap_fixed<32,16

In [8]:
import numpy as np
np.random.seed(0)
x = np.random.rand(1, 32)
print(hls_model.predict(x))
print(model.predict(x))
np.savetxt('./hls_model/dotproduct/tb_data/tb_input_features.dat', x.reshape(1,-1))
np.savetxt('./hls_model/dotproduct/tb_data/tb_output_predictions.dat', model.predict(x).reshape(1,-1))

[0.0328064]
[[0.03317928]]


In [9]:
!source /opt/Xilinx/Vivado/2019.2/settings64.sh
import os
os.environ['PATH'] = '/opt/Xilinx/Vivado/2019.2' + '/bin:' + os.environ['PATH']
hls_model.build(csim=True, synth=True, vsynth=True, cosim=True, validation=True, export=True)


****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2019.2 (64-bit)
  **** SW Build 2708876 on Wed Nov  6 21:39:14 MST 2019
  **** IP Build 2700528 on Thu Nov  7 00:09:20 MST 2019
    ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.

source /opt/Xilinx/Vivado/2019.2/scripts/vivado_hls/hls.tcl -notrace
INFO: Applying HLS Y2K22 patch v1.2 for IP revision
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2019.2/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'hisky' on host 'r7515ed520.EE.NCTU.edu.tw' (Linux_x86_64 version 3.10.0-1160.90.1.el7.x86_64) on Tue Jul 18 15:02:45 CST 2023
INFO: [HLS 200-10] On os "CentOS Linux release 7.9.2009 (Core)"
INFO: [HLS 200-10] In directory '/home/hisky/Lab-undergrad/Lab4/hls_model/dotproduct'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Opening project '/home/hisky/Lab-undergrad/Lab4/hls_model/dotproduct/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject.cpp' to the project
INFO: [

{'CSimResults': [['0.0328064']],
 'CosimResults': [['0.0328064']],
 'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.950',
  'BestLatency': '287',
  'WorstLatency': '287',
  'IntervalMin': '258',
  'IntervalMax': '258',
  'BRAM_18K': '2',
  'DSP': '2',
  'FF': '1462',
  'LUT': '917',
  'URAM': '0',
  'AvailableBRAM_18K': '280',
  'AvailableDSP': '220',
  'AvailableFF': '106400',
  'AvailableLUT': '53200',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '1351',
  'FF': '3420',
  'BRAM_18K': '0',
  'DSP48E': '2'},
 'CosimReport': {'RTL': 'Verilog',
  'Status': 'Pass',
  'LatencyMin': 260,
  'LatencyMax': 260,
  'IntervalMin': 0,
  'IntervalMax': 0,
  'LatencyAvg': 260.0,
  'IntervalAvg': 0.0}}

In [12]:
hls4ml.report.parse_vivado_report('/home/hisky/Lab-undergrad/Lab4/hls_model/dotproduct')

Implementation report not found.
Timing report not found.


{'CSimResults': [['0.0328064']],
 'CosimResults': [['0.0328064']],
 'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.950',
  'BestLatency': '287',
  'WorstLatency': '287',
  'IntervalMin': '258',
  'IntervalMax': '258',
  'BRAM_18K': '2',
  'DSP': '2',
  'FF': '1462',
  'LUT': '917',
  'URAM': '0',
  'AvailableBRAM_18K': '280',
  'AvailableDSP': '220',
  'AvailableFF': '106400',
  'AvailableLUT': '53200',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '1351',
  'FF': '3420',
  'BRAM_18K': '0',
  'DSP48E': '2'},
 'CosimReport': {'RTL': 'Verilog',
  'Status': 'Pass',
  'LatencyMin': 260,
  'LatencyMax': 260,
  'IntervalMin': 0,
  'IntervalMax': 0,
  'LatencyAvg': 260.0,
  'IntervalAvg': 0.0}}

In [11]:
import hls4ml
#hls4ml.model.optimizer.OutputRoundingSaturationMode.layers = ['Activation']
#hls4ml.model.optimizer.OutputRoundingSaturationMode.rounding_mode = 'AP_RND_CONV'
#hls4ml.model.optimizer.OutputRoundingSaturationMode.saturation_mode = 'AP_SAT'
config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='ap_fixed<32,16>')
import pprint
pprint.pprint(config)  
hls_model = hls4ml.converters.convert_from_keras_model(model,
                                                       hls_config=config,
                                                       output_dir='./hls_model/dotproduct_axi',
                                                       io_type = 'io_stream',
                                                       backend='VivadoAccelerator', board='pynq-z2')
                                                       #part='xc7z020clg400-1')
hls_model.compile()

Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: dot_product, layer type: DotProduct, input shapes: [[None, 32]], output shape: 1
{'LayerName': {'dot_product': {'Precision': {'result': 'ap_fixed<32,16>'},
                               'Trace': False},
               'input_1': {'Precision': {'result': 'ap_fixed<32,16>'},
                           'Trace': False}},
 'Model': {'BramFactor': 1000000000,
           'Precision': 'ap_fixed<32,16>',
           'ReuseFactor': 1,
           'Strategy': 'Latency',
           'TraceOutput': False}}
Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 32]], output shape: [None, 32]
Layer name: dot_product, layer type: DotProduct, input shapes: [[None, 32]], output shape: 1
Creating HLS model
{'AcceleratorConfig': {'Board': 'pynq-z2',
                       'Driver': 'python',
                       'Interface': 'ax

In [15]:
hls_model.build(csim=True, synth=True, vsynth=True, cosim=True, validation=True, export=True, bitfile=True)


****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2019.2 (64-bit)
  **** SW Build 2708876 on Wed Nov  6 21:39:14 MST 2019
  **** IP Build 2700528 on Thu Nov  7 00:09:20 MST 2019
    ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.

source /opt/Xilinx/Vivado/2019.2/scripts/vivado_hls/hls.tcl -notrace
INFO: Applying HLS Y2K22 patch v1.2 for IP revision
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2019.2/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'hisky' on host 'r7515ed520.EE.NCTU.edu.tw' (Linux_x86_64 version 3.10.0-1160.90.1.el7.x86_64) on Tue Jul 18 15:58:33 CST 2023
INFO: [HLS 200-10] On os "CentOS Linux release 7.9.2009 (Core)"
INFO: [HLS 200-10] In directory '/home/hisky/Lab-undergrad/Lab4/hls_model/dotproduct_axi'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Opening project '/home/hisky/Lab-undergrad/Lab4/hls_model/dotproduct_axi/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject_axi.cpp' to the pro

{'CSimResults': [['{', 'data:', '0,', 'last:', '1', '}'], []],
 'CosimResults': [['{', 'data:', '0,', 'last:', '1', '}'], []],
 'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '4.375',
  'BestLatency': '289',
  'WorstLatency': '289',
  'IntervalMin': '290',
  'IntervalMax': '290',
  'BRAM_18K': '2',
  'DSP': '2',
  'FF': '6544',
  'LUT': '15392',
  'URAM': '0',
  'AvailableBRAM_18K': '280',
  'AvailableDSP': '220',
  'AvailableFF': '106400',
  'AvailableLUT': '53200',
  'AvailableURAM': '0'},
 'VivadoSynthReport': {'LUT': '702',
  'FF': '1239',
  'BRAM_18K': '0',
  'DSP48E': '2'},
 'CosimReport': {'RTL': 'Verilog',
  'Status': 'Pass',
  'LatencyMin': 558,
  'LatencyMax': 558,
  'IntervalMin': 0,
  'IntervalMax': 0,
  'LatencyAvg': 558.0,
  'IntervalAvg': 0.0},
 'ImplementationReport': {'TotLUTs': 5473,
  'TotLUTs%': 10.29,
  'LogicLUTs': 5200,
  'LogicLUTs%': 9.77,
  'LUTRAMs': 22,
  'LUTRAMs%': 0.13,
  'SRLs': 251,
  'SRLs%': 1.44,
  'FFs': 9770,
  'FFs%': 

In [17]:
import numpy as np
np.random.seed(0)
x = np.random.rand(1, 32)
y_hls = hls_model.predict(x)
y_k = model.predict(x)
print(y_hls)
print(y_k)
np.savetxt('./hls_model/dotproduct_axi/tb_data/tb_input_features.dat', x.reshape(1,-1))
np.save('./hls_model/dotproduct_axi/tb_data/x_test.npy', x)
np.save('./hls_model/dotproduct_axi/tb_data/y_hls.npy', y_hls)
np.save('./hls_model/dotproduct_axi/tb_data/y_k.npy', y_k)
np.savetxt('./hls_model/dotproduct_axi/tb_data/tb_output_predictions.dat', model.predict(x).reshape(1,-1))

[0.0328064]
[[0.03317928]]
