In [1]:
import tensorflow as tf
import numpy as np

2023-07-10 03:59:08.777048: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# 数据预处理
train_images = train_images / 255.0
test_images = test_images / 255.0

# Define the SNN layer

In [4]:
class SpikingLayer(tf.keras.layers.Layer):
    def __init__(self, num_neurons, **kwargs):
        super(SpikingLayer, self).__init__(**kwargs)
        self.num_neurons = num_neurons

    def build(self, input_shape):
        self.kernel = self.add_weight("kernel", shape=[int(input_shape[-1]), self.num_neurons])
        self.threshold = self.add_weight("threshold", shape=[self.num_neurons])

    def call(self, inputs):
        # 计算输入与权重的点积
        outputs = tf.matmul(inputs, self.kernel)
        # 应用阈值函数
        outputs = tf.nn.relu(outputs - self.threshold)
        return outputs

# hls4ml Config Part

In [5]:
from tensorflow.keras.models import load_model

model = load_model("SNN_MNIST.h5", custom_objects={'SpikingLayer': SpikingLayer})

2023-07-10 03:59:30.135103: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)
model.summary()

Test Loss: 0.42862460017204285
Test Accuracy: 0.8709999918937683
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 spiking_layer (SpikingLayer  (None, 4)                3140      
 )                                                               
                                                                 
 spiking_layer_1 (SpikingLay  (None, 128)              640       
 er)                                                             
                                                                 
 spiking_layer_2 (SpikingLay  (None, 16)               2064      
 er)                                                             
                                                                 
 dense (Dense)               (None, 10)                17

In [8]:
import hls4ml

class HSpikingLayer(hls4ml.model.layers.Layer):
    def initialize(self):
        inp = self.get_input_variable()
        shape = inp.shape
        dims = inp.dim_names
        self.add_output_variable(shape, dims)

    def configure(self):
        num_neurons = self.attributes['num_neurons']
        # 根据需要配置HLS层的参数

    def predict(self):
        input_node = self.get_input_node()
        output_node = self.get_output_node()
        # 实现HLS层的预测逻辑

In [9]:
def parse_spiking_layer(keras_layer, input_names, input_shapes, data_reader):
    layer = {}
    layer['class_name'] = 'HSpikingLayer'  # 为hls4ml层指定一个唯一的class_name
    layer['name'] = keras_layer['config']['name']
    layer['num_neurons'] = keras_layer['config']['num_neurons']

    if input_names is not None:
        layer['inputs'] = input_names

    return layer, [shape for shape in input_shapes[0]]


In [10]:
def register_custom_layer():
    hls4ml.converters.register_keras_layer_handler('SpikingLayer', parse_spiking_layer)
    hls4ml.model.layers.register_layer('HSpikingLayer', HSpikingLayer)

In [11]:
register_custom_layer()

In [13]:
import hls4ml
import plotting

config = hls4ml.utils.config_from_keras_model(model, granularity='name')
#config['InputShape'] = {'spiking_layer_input': (None, 784)}
config['Model']['Precision'] = 'ap_fixed<12,6>'
config['Model']['ReuseFactor'] = 1
'''
for Layer in config['LayerName'].keys():
    config['LayerName'][Layer]['Strategy'] = 'Latency'
    config['LayerName'][Layer]['ReuseFactor'] = 1
    #config['LayerName'][Layer]['Precision'] = 'ap_fixed<8,4>'
'''
config['LayerName']['dense']['Strategy'] = 'Stable'
'''
for layer in ['conv1', 'conv2'] :
    config['LayerName'][layer]['Precision'] = 'ap_fixed<8,4>'
'''
print("-----------------------------------")
plotting.print_dict(config)
print("-----------------------------------")

cfg = hls4ml.converters.create_config(backend='VivadoAccelerator')
cfg['IOType'] = 'io_stream'
cfg['HLSConfig'] = config
cfg['KerasModel'] = model
cfg['OutputDir'] = 'AlexNet_PYNQ'
cfg['Board'] = 'pynq-z2'

hls_model = hls4ml.converters.keras_to_hls(cfg)

hls_model.compile()

Interpreting Sequential
Topology:
Layer name: flatten_input, layer type: InputLayer, input shapes: [[None, 28, 28]], output shape: [None, 28, 28]
Layer name: flatten, layer type: Reshape, input shapes: [[None, 28, 28]], output shape: [None, 784]
Layer name: spiking_layer, layer type: HSpikingLayer, input shapes: [[None, 784]], output shape: [None, 784]
Layer name: spiking_layer_1, layer type: HSpikingLayer, input shapes: [[None, 784]], output shape: [None, 784]
Layer name: spiking_layer_2, layer type: HSpikingLayer, input shapes: [[None, 784]], output shape: [None, 784]
Layer name: dense, layer type: Dense, input shapes: [[None, 784]], output shape: [None, 10]
-----------------------------------
Model
  Precision:         ap_fixed<12,6>
  ReuseFactor:       1
  Strategy:          Latency
  BramFactor:        1000000000
  TraceOutput:       False
LayerName
  flatten_input
    Trace:           False
    Precision
      result:        fixed<16,6>
  flatten
    Trace:           False
    P

In [14]:
import os

os.environ['PATH'] = os.environ['XILINX_VIVADO'] + '/bin:' + os.environ['PATH']
os.environ['LD_PRELOAD'] = '/lib/x86_64-linux-gnu/libudev.so.1'

In [15]:
hls_model.build(csim=False, export=True, bitfile=True)


****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2019.2 (64-bit)
  **** SW Build 2708876 on Wed Nov  6 21:39:14 MST 2019
  **** IP Build 2700528 on Thu Nov  7 00:09:20 MST 2019
    ** Copyright 1986-2019 Xilinx, Inc. All Rights Reserved.

source /opt/Xilinx/Vivado/2019.2/scripts/vivado_hls/hls.tcl -notrace
INFO: Applying HLS Y2K22 patch v1.2 for IP revision
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2019.2/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'jovyan' on host '6307b0c947c6' (Linux_x86_64 version 4.15.0-212-generic) on Mon Jul 10 04:29:02 UTC 2023
INFO: [HLS 200-10] In directory '/home/jovyan/Internship_Waseda/hls4ml/SNN/AlexNet_PYNQ'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Creating and opening project '/home/jovyan/Internship_Waseda/hls4ml/SNN/AlexNet_PYNQ/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject_axi.cpp' to the project
INFO: [HLS 200-10] Adding design file 'firmware/myproject.cpp' to the

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '4.367',
  'BestLatency': '7113',
  'WorstLatency': '7113',
  'IntervalMin': '7114',
  'IntervalMax': '7114',
  'BRAM_18K': '0',
  'DSP': '0',
  'FF': '3114',
  'LUT': '11413',
  'URAM': '0',
  'AvailableBRAM_18K': '280',
  'AvailableDSP': '220',
  'AvailableFF': '106400',
  'AvailableLUT': '53200',
  'AvailableURAM': '0'},
 'TimingReport': {'WNS': 1.921,
  'TNS': 0.0,
  'WHS': 0.028,
  'THS': 0.0,
  'WPWS': 3.75,
  'TPWS': 0.0}}

In [16]:
!sed -n '30,45p' AlexNet_PYNQ/myproject_vivado_accelerator/project_1.runs/impl_1/design_1_wrapper_utilization_placed.rpt


+----------------------------+------+-------+-----------+-------+
|          Site Type         | Used | Fixed | Available | Util% |
+----------------------------+------+-------+-----------+-------+
| Slice LUTs                 | 3217 |     0 |     53200 |  6.05 |
|   LUT as Logic             | 2980 |     0 |     53200 |  5.60 |
|   LUT as Memory            |  237 |     0 |     17400 |  1.36 |
|     LUT as Distributed RAM |   22 |     0 |           |       |
|     LUT as Shift Register  |  215 |     0 |           |       |
| Slice Registers            | 4036 |     0 |    106400 |  3.79 |
|   Register as Flip Flop    | 4036 |     0 |    106400 |  3.79 |
|   Register as Latch        |    0 |     0 |    106400 |  0.00 |
| F7 Muxes                   |    2 |     0 |     26600 | <0.01 |
| F8 Muxes                   |    0 |     0 |     13300 |  0.00 |
+----------------------------+------+-------+-----------+-------+

