| Register Name                | Interface       | Slave Segment  | Master Base Address | Range  | Master High Address |
|------------------------------|----------------|---------------|---------------------|--------|---------------------|
| Sysreg                       | S00_AXI        | S00_AXI_reg   | 0xA001_0000        | 64K    | 0xA001_FFFF        |
| AXI CDMA S_AXI_LITE          | S_AXI_LITE     | Reg           | 0xA000_0000        | 64K    | 0xA000_FFFF        |
| Output Data (bram_ctrl)      | S_AXI          | Mem0          | 0xE800_0000        | 256K   | 0xD003_FFFF        |
| h_data (bram_ctrl)           | S_AXI          | Mem0          | 0xE000_0000        | 1M     | 0xE00F_FFFF        |
| node_info (bram_ctrl)        | S_AXI          | Mem0          | 0xE200_0000        | 64K    | 0xE200_FFFF        |
| wgt_a (bram_ctrl)            | S_AXI          | Mem0          | 0xE400_0000        | 128K   | 0xE401_FFFF        |
| DDR Low                      | S_AXI_HP0_FPD  | HP0_DDR_LOW   | 0x0000_0000        | 2G     | 0x7FFF_FFFF        |
| QSPI                         | S_AXI_HP0_FPD  | HP0_QSPI      | 0xC100_0000        | 16M    | 0xC1FF_FFFF        |




In [1]:
import os
import sys
import time
import pynq
from pynq import Overlay, allocate, MMIO
import numpy as np
from pynq_cdma import CDMA
from pprint import pprint

In [2]:
sys.path.append(os.path.abspath("/root/GAT_FPGA/misc"))
from LoadData import LoadData
from BRAM import BRAM
from LoadData import binary_to_decimal, decimal_to_binary
from Helper import *
from Validate import validate_input, validate_output
from Quantization import *

In [3]:
# MAIN FLOW
# 1. LOAD DATA
# 2. Validate input data.
# 3. Transfer data to BRAM
# 4. Read data for test.
# 5. Check with output data of predictor.
# print(pynq.ps.Clocks.clk_cpu)
print(pynq.ps.Clocks.fclk0_mhz)

214.283571


## Select Dataset

In [4]:
# DATASET = input()
DATASET = "Cora"
# DATASET = "Citeseer"
dataset = DATASET.lower()
layer_1 = 0
layer_2 = 1

In [5]:
GlobalConfiguration["dataset"]["name"] = DATASET
GlobalConfiguration

{'GAT': {'hiddenChannel': 16, 'head': 1},
 'model': {'savePath': 'model_params.pth', 'scaleMin': -127, 'scaleMax': 127},
 'dataset': {'root': 'data/Planetoid', 'name': 'Cora', 'normalization': False}}

In [6]:
if DATASET == "Cora":
    H_DATA_DEPTH_LAYER1, WEIGHT_DEPTH_LAYER1, NODE_INFO_DEPTH, FEAT_DEPTH_LAYER1 = 242101, 22960, 13264, 43328
    H_DATA_DEPTH_LAYER2, WEIGHT_DEPTH_LAYER2, FEAT_DEPTH_LAYER2 = 212224, 126, 18956
    
elif DATASET == "Citeseer":
    H_DATA_DEPTH_LAYER1, WEIGHT_DEPTH_LAYER1, NODE_INFO_DEPTH, FEAT_DEPTH_LAYER1 = 399089, 59280, 12383, 52464
    H_DATA_DEPTH_LAYER2, WEIGHT_DEPTH_LAYER2, FEAT_DEPTH_LAYER2 = 212224, 126, 18956

## Prepare Data

In [22]:
gat_main_path = "/root/GAT_FPGA/main_design"
gat_dataset_path = f"{gat_main_path}/data/{dataset}/"

overlay_gat = Overlay(f"{gat_main_path}/hw_test/design_gat_wrapper.bit")
overlay_gat?
data_loader = LoadData(f"{gat_main_path}/data/{dataset}/layer_1/input")

[LoadData] : reading /root/GAT_FPGA/main_design/data/cora/layer_1/input/h_data.txt
[LoadData] : reading /root/GAT_FPGA/main_design/data/cora/layer_1/input/node_info.txt
[LoadData] : reading /root/GAT_FPGA/main_design/data/cora/layer_1/input/weight.txt


In [8]:
input_data = data_loader.get_data()

In [9]:
weight_bram    = BRAM(0xE400_0000, WEIGHT_DEPTH_LAYER1)
h_data_bram    = BRAM(0xE000_0000, H_DATA_DEPTH_LAYER1)
node_info_bram = BRAM(0xE200_0000, NODE_INFO_DEPTH)

h_data_bram._alloc(dtype=np.uint32)
h_data_bram.buffer[:] = input_data["h_data"]

node_info_bram._alloc(dtype=np.uint32)
node_info_bram.buffer[:] = input_data["node_info"]

weight_bram._alloc(dtype=np.int32)
weight_bram.buffer[:] = input_data["weight"]

In [10]:
feat_out_bram = BRAM(0xE800_0000, FEAT_DEPTH_LAYER1)
feat_out_bram._alloc(dtype=np.uint32)

### Register Bank

In [11]:
SYSREG_ADDR  = 0x00A001_0000
SYSREG_RANGE = 64*1024
REG = {
    "gat_layer"          : 0,
    "gat_load_done"      : 4,
    "wgt_load_done"      : 4,
    "h_data_load_done"   : 8,
    "node_info_load_done": 12,
    "gat_ready"          : 16,
    "i_gat_debug_1"      : 20,
    "i_gat_debug_2"      : 24,
    "i_gat_debug_3"      : 28,
}

cdma   = overlay_gat.axi_cdma_0
sysreg = MMIO(SYSREG_ADDR, SYSREG_RANGE)

## Start Layer 1 (F = 225MHz)

In [12]:
sysreg.write(REG["gat_layer"], layer_1)
sysreg.write(REG["gat_load_done"], 0)
start_time = time.perf_counter()
cdma.transfer(node_info_bram.buffer, node_info_bram.BASE_ADDR)
cdma.transfer(h_data_bram.buffer, h_data_bram.BASE_ADDR)
cdma.transfer(weight_bram.buffer, weight_bram.BASE_ADDR)
end_time = time.perf_counter()

print(f"Transferring Time = {round((end_time-start_time)*1000, 3)} ms") 
sysreg.write(REG["gat_load_done"], 1)


start_time = time.perf_counter()
while (1):
    if sysreg.read(REG["gat_ready"]) == 1:
        end_time = time.perf_counter()
        break
cdma.transfer(feat_out_bram.BASE_ADDR, feat_out_bram.buffer)
sysreg.write(REG["gat_load_done"], 0)

print(f"Execution Time = {round((end_time-start_time)*1000, 3)} ms") 

Transferring Time = 10.082 ms
Execution Time = 1.091 ms


## Output Layer 1


In [13]:
feat_out_buffer = []
for i in range(len(feat_out_bram.buffer)):
    feat_out_buffer.append(feat_out_bram.buffer[i] / (2**16))
print(feat_out_buffer[:50])

#TODO: Validate Output
validate_output(feat_out_buffer, f"{gat_dataset_path}/layer_1/", report_status="ERRORINFO")

[144.0, 173.5, 98.75, 354.75, 258.0, 57.75, 0.0, 0.0, 297.25, 0.0, 0.0, 155.75, 28.0, 156.5, 0.0, 0.0, 0.0, 0.0, 0.0, 52.5, 217.5, 0.0, 0.0, 38.5, 0.0, 0.0, 0.0, 593.75, 18.0, 0.0, 264.5, 0.0, 0.0, 0.0, 0.0, 190.83331298828125, 257.8333282470703, 0.0, 0.0, 14.5, 0.0, 0.0, 0.0, 418.5, 0.0, 0.0, 136.33334350585938, 0.0, 0.0, 55.5]

❌ ERROR VALUES
+--------+-------+--------------------+--------------------+------------+
| Status | Index |      Expected      |       Actual       | Difference |
+--------+-------+--------------------+--------------------+------------+
| [91mERROR[0m  | 10736 |       180.0        |        0.0         |   180.0    |
| [91mERROR[0m  | 10737 |       653.0        | 175.77154541015625 | 477.228455 |
| [91mERROR[0m  | 10739 |        0.0         |   10.38818359375   | 10.388184  |
| [91mERROR[0m  | 10741 |       755.0        | 264.5411376953125  | 490.458862 |
| [91mERROR[0m  | 10742 |       472.0        | 414.16844177246094 | 57.831558  |
| [91mERROR[0m

## Debugger

In [14]:
data_loader_instance = DatasetLoaderV2()
gat_instance = GATV2(data_loader_instance)
model_instance = BuildModelV2(gat_instance)

layer_2_input = handle_new_feature(feat_out_buffer, model_instance, data_loader_instance)

# pprint(h_data_input)
# pprint(weight_input)

Model parameters loaded from model_params.pth
Time to run test: 3.875328 seconds


## prepare layer 2

In [15]:
weight_bram = BRAM(0xE400_0000, WEIGHT_DEPTH_LAYER2)
h_data_bram = BRAM(0xE000_0000, H_DATA_DEPTH_LAYER2)

h_data_bram._alloc(dtype=np.uint32)
weight_bram._alloc(dtype=np.uint32)

for i in range(H_DATA_DEPTH_LAYER2):
    h_data_bram.buffer[i] = binary_to_decimal(layer_2_input["h_data"][i])
for i in range(WEIGHT_DEPTH_LAYER2):
    weight_bram.buffer[i] = binary_to_decimal(layer_2_input["weight"][i])

# print(weight_bram.buffer)
# print(h_data_bram.buffer)

In [16]:
feat_out_bram = BRAM(0xE800_0000, FEAT_DEPTH_LAYER2)
feat_out_bram._alloc(dtype=np.uint32)

## starting layer 2

In [17]:
sysreg.write(REG["gat_layer"], layer_2)

start_time = time.perf_counter()
cdma.transfer(h_data_bram.buffer, h_data_bram.BASE_ADDR)
cdma.transfer(weight_bram.buffer, weight_bram.BASE_ADDR)
end_time = time.perf_counter()

sysreg.write(REG["gat_load_done"], 1)
print(f"Transsferring Time = {round((end_time-start_time)*1000, 3)} ms") 

#==================================
start_time = time.perf_counter()
while (1):
    if sysreg.read(REG["gat_ready"]) == 1:
        end_time = time.perf_counter()
        break
cdma.transfer(feat_out_bram.BASE_ADDR, feat_out_bram.buffer)
sysreg.write(REG["gat_load_done"], 0)
#==================================

print(f"Execution Time = {round((end_time-start_time)*1000, 3)} ms") 

Transsferring Time = 1.958 ms
Execution Time = 0.317 ms


In [18]:
feat_out_buffer = []
for i in range(len(feat_out_bram.buffer)):
    feat_out_buffer.append(feat_out_bram.buffer[i] / (2**16))
print(feat_out_buffer[:50])

#TODO: Validate Output
validate_output(feat_out_buffer, f"{gat_dataset_path}/layer_2/", report_status="ERRORINFO")

[0.0, 105.0, 0.0, 19540.749938964844, 0.0, 0.0, 799.75, 2505.9981536865234, 0.0, 0.0, 554.0032958984375, 12234.002426147461, 0.0, 847.9967498779297, 1413.7910614013672, 0.0, 611.7803192138672, 1566.5962829589844, 16018.692687988281, 0.0, 0.0, 19031.999969482422, 0.0, 0.0, 321.0, 2197.0, 0.0, 3249.0, 0.0, 0.0, 14758.500015258789, 16530.33331298828, 4016.1666870117188, 0.0, 0.0, 2345.5, 0.0, 20723.749938964844, 6846.25, 0.0, 0.0, 0.0, 9674.399978637695, 0.0, 2474.2000122070312, 1679.5999908447266, 3431.5999908447266, 800.5999908447266, 2564.199996948242, 2245.5]

❌ ERROR VALUES
+--------+-------+--------------------+--------------------+--------------+
| Status | Index |      Expected      |       Actual       |  Difference  |
+--------+-------+--------------------+--------------------+--------------+
| [91mERROR[0m  |  210  | 7776.285714285713  | 7364.714279174805  |  411.571435  |
| [91mERROR[0m  |  212  | 9738.571428571426  | 9356.857131958008  |  381.714297  |
| [91mERROR[0m  |

+--------+-------+------------+------------+------------+
| Status | Index |  Expected  |   Actual   | Difference |
+--------+-------+------------+------------+------------+
|  [92mINFO[0m  |   0   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   1   |   105.0    |   105.0    |    0.0     |
|  [92mINFO[0m  |   2   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   4   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   5   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   6   |   799.75   |   799.75   |    0.0     |
|  [92mINFO[0m  |   8   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   9   |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   12  |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   15  |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   19  |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   20  |    0.0     |    0.0     |    0.0     |
|  [92mINFO[0m  |   

In [19]:
handle_classification(feat_out_buffer, data_loader_instance)

- Golden : tensor([3, 4, 4,  ..., 3, 3, 3])
- DUT    : tensor([3, 4, 4,  ..., 0, 3, 3])

 => Accuracy = [91m77.0679[0m % (2087 / 2708)


### Debugger

In [20]:
print(sysreg.read(REG["i_gat_debug_1"]))        
print(sysreg.read(REG["i_gat_debug_2"]))
format_print(sysreg.read(REG["i_gat_debug_3"]))

242101
255
19:38 - 28/03
