| Register Name                | Interface       | Slave Segment  | Master Base Address | Range  | Master High Address |
|------------------------------|----------------|---------------|---------------------|--------|---------------------|
| Sysreg                       | S00_AXI        | S00_AXI_reg   | 0xA001_0000        | 64K    | 0xA001_FFFF        |
| AXI CDMA S_AXI_LITE          | S_AXI_LITE     | Reg           | 0xA000_0000        | 64K    | 0xA000_FFFF        |
| Output Data (bram_ctrl)      | S_AXI          | Mem0          | 0xE800_0000        | 256K   | 0xD003_FFFF        |
| h_data (bram_ctrl)           | S_AXI          | Mem0          | 0xE000_0000        | 1M     | 0xE00F_FFFF        |
| node_info (bram_ctrl)        | S_AXI          | Mem0          | 0xE200_0000        | 64K    | 0xE200_FFFF        |
| wgt_a (bram_ctrl)            | S_AXI          | Mem0          | 0xE400_0000        | 128K   | 0xE401_FFFF        |
| DDR Low                      | S_AXI_HP0_FPD  | HP0_DDR_LOW   | 0x0000_0000        | 2G     | 0x7FFF_FFFF        |
| QSPI                         | S_AXI_HP0_FPD  | HP0_QSPI      | 0xC100_0000        | 16M    | 0xC1FF_FFFF        |




In [1]:
import os
import sys
import time
import pynq
from pynq import Overlay, allocate, MMIO
import numpy as np
from pynq_cdma import CDMA
from pprint import pprint

In [2]:
sys.path.append(os.path.abspath("/root/GAT_FPGA/misc"))
from LoadData import LoadData
from BRAM import BRAM
from LoadData import binary_to_decimal, decimal_to_binary
from Helper import *
from Validate import validate_input, validate_output
from Quantization import *

In [3]:
# MAIN FLOW
# 1. LOAD DATA
# 2. Validate input data.
# 3. Transfer data to BRAM
# 4. Read data for test.
# 5. Check with output data of predictor.
# print(pynq.ps.Clocks.clk_cpu)
print(pynq.ps.Clocks.fclk0_mhz)

214.283571


## Select Dataset

In [4]:
# DATASET = input()
DATASET = "Cora"
# DATASET = "Citeseer"
dataset = DATASET.lower()
layer_1 = 0
layer_2 = 1

In [5]:
GlobalConfiguration["dataset"]["name"] = DATASET
GlobalConfiguration

{'GAT': {'hiddenChannel': 16, 'head': 1},
 'model': {'savePath': 'model_params.pth', 'scaleMin': -127, 'scaleMax': 127},
 'dataset': {'root': 'data/Planetoid', 'name': 'Cora', 'normalization': False}}

In [6]:
if DATASET == "Cora":
    H_DATA_DEPTH_LAYER1, WEIGHT_DEPTH_LAYER1, NODE_INFO_DEPTH, FEAT_DEPTH_LAYER1 = 242101, 23086, 13264, 43328
    H_DATA_DEPTH_LAYER2, WEIGHT_DEPTH_LAYER2, FEAT_DEPTH_LAYER2 = 212224, 126, 18956
    
elif DATASET == "Citeseer":
    H_DATA_DEPTH_LAYER1, WEIGHT_DEPTH_LAYER1, NODE_INFO_DEPTH, FEAT_DEPTH_LAYER1 = 399089, 59388, 12383, 52464
    H_DATA_DEPTH_LAYER2, WEIGHT_DEPTH_LAYER2, FEAT_DEPTH_LAYER2 = 198128, 108, 19674

## Prepare Data

In [7]:
gat_main_path = "/root/GAT_FPGA/gat_v2/main_design"
gat_dataset_path = f"{gat_main_path}/data/{dataset}/"

overlay_gat = Overlay(f"{gat_main_path}/hw/design_gat_wrapper.bit")
overlay_gat?
data_loader = LoadData(f"{gat_main_path}/data/{dataset}/layer_1/input")

[LoadData] : reading /root/GAT_FPGA/gat_v2/main_design/data/cora/layer_1/input/h_data.txt
[LoadData] : reading /root/GAT_FPGA/gat_v2/main_design/data/cora/layer_1/input/node_info.txt
[LoadData] : reading /root/GAT_FPGA/gat_v2/main_design/data/cora/layer_1/input/weight.txt
[LoadData] : reading /root/GAT_FPGA/gat_v2/main_design/data/cora/layer_1/input/subgraph_index.txt


In [8]:
input_data = data_loader.get_data()
input_data["subgraph_index"]

[32768,
 6406,
 18856,
 25779,
 32776,
 18,
 6586,
 6597,
 32778,
 16,
 3430,
 14458,
 16528,
 20239,
 32796,
 25535,
 32800,
 9960,
 12276,
 17744,
 22322,
 22335,
 32812,
 16210,
 16466,
 25543,
 32820,
 3792,
 10162,
 14094,
 15837,
 32830,
 2185,
 32834,
 2722,
 2836,
 20431,
 32842,
 7204,
 25957,
 32848,
 4862,
 25539,
 32854,
 16418,
 18645,
 32860,
 9758,
 12892,
 26204,
 26211,
 32870,
 16862,
 18301,
 32876,
 1764,
 20812,
 21296,
 21326,
 26251,
 32888,
 10658,
 10690,
 12434,
 24093,
 32898,
 9488,
 16232,
 24742,
 26091,
 32908,
 204,
 9132,
 12852,
 12858,
 21975,
 32920,
 1540,
 15502,
 18060,
 21386,
 22025,
 32932,
 19703,
 32936,
 10462,
 23260,
 23276,
 24162,
 24167,
 32948,
 10210,
 23627,
 32954,
 358,
 12102,
 17012,
 17034,
 22971,
 32966,
 22179,
 32910,
 202,
 2126,
 6056,
 16268,
 16864,
 21964,
 21987,
 32986,
 12706,
 13118,
 20598,
 23665,
 32996,
 1070,
 1388,
 1398,
 24870,
 24879,
 33008,
 6178,
 18302,
 24036,
 25749,
 33018,
 16733,
 33022,
 9396,
 26

In [9]:
weight_bram    = BRAM(0xE400_0000, WEIGHT_DEPTH_LAYER1)
h_data_bram    = BRAM(0xE000_0000, H_DATA_DEPTH_LAYER1)
node_info_bram = BRAM(0xE200_0000, NODE_INFO_DEPTH)
subgraph_idx_bram = BRAM(0xE600_0000, NODE_INFO_DEPTH)

h_data_bram._alloc(dtype=np.uint32)
h_data_bram.buffer[:] = input_data["h_data"]

node_info_bram._alloc(dtype=np.uint32)
node_info_bram.buffer[:] = input_data["node_info"]

weight_bram._alloc(dtype=np.int32)
weight_bram.buffer[:] = input_data["weight"]

subgraph_idx_bram._alloc(dtype=np.uint32)
subgraph_idx_bram.buffer[:] = input_data["subgraph_index"]


In [10]:
subgraph_idx_bram.buffer

PynqBuffer([32768,  6406, 18856, ..., 14628, 26516, 26519], dtype=uint32)

In [11]:
feat_out_bram = BRAM(0xE800_0000, FEAT_DEPTH_LAYER2)
feat_out_bram._alloc(dtype=np.uint32)

## Init model

In [12]:
data_loader_instance = DatasetLoaderV2()
gat_instance = GATV2(data_loader_instance)
model_instance = BuildModelV2(gat_instance)

Model parameters loaded from model_params.pth


### Register Bank

In [13]:
SYSREG_ADDR  = 0x00A001_0000
SYSREG_RANGE = 64*1024
REG = {
    "gat_layer"          : 0,
    "gat_load_done"      : 4,
    "wgt_load_done"      : 4,
    "h_data_load_done"   : 8,
    "node_info_load_done": 12,
    "gat_ready"          : 16,
    "i_gat_debug_1"      : 20,
    "i_gat_debug_2"      : 24,
    "i_gat_debug_3"      : 28,
}

cdma   = overlay_gat.axi_cdma_0
sysreg = MMIO(SYSREG_ADDR, SYSREG_RANGE)

## Start Transfer

In [14]:
# sysreg.write(REG["gat_layer"], layer_1)
sysreg.write(REG["gat_load_done"], 0)
start_time = time.perf_counter()
cdma.transfer(node_info_bram.buffer, node_info_bram.BASE_ADDR)
cdma.transfer(h_data_bram.buffer, h_data_bram.BASE_ADDR)
cdma.transfer(weight_bram.buffer, weight_bram.BASE_ADDR)
cdma.transfer(subgraph_idx_bram.buffer, subgraph_idx_bram.BASE_ADDR)
end_time = time.perf_counter()

print(f"Transferring Time = {round((end_time-start_time)*1000, 3)} ms") 
sysreg.write(REG["gat_load_done"], 1)


start_time = time.perf_counter()
while (1):
    if sysreg.read(REG["gat_ready"]) == 1:
        end_time = time.perf_counter()
        break
cdma.transfer(feat_out_bram.BASE_ADDR, feat_out_bram.buffer)
sysreg.write(REG["gat_load_done"], 0)

print(f"Execution Time = {round((end_time-start_time)*1000, 3)} ms") 

Transferring Time = 12.17 ms
Execution Time = 3.057 ms


## Output Final Layer


In [15]:
feat_out_buffer = []
for i in range(len(feat_out_bram.buffer)):
    feat_out_buffer.append(feat_out_bram.buffer[i] / (2**16))
print(feat_out_buffer[:50])

print(len(feat_out_buffer))


[0.0, 63.0, 0.0, 17281.124969482422, 0.0, 0.0, 732.875, 2164.0, 0.0, 0.0, 543.0, 10856.499984741211, 0.0, 760.5, 1225.9945526123047, 0.0, 538.9935455322266, 1443.0157012939453, 14042.992202758789, 0.0, 0.0, 16818.49998474121, 0.0, 0.0, 294.0, 1877.5, 0.0, 2892.5, 0.0, 0.0, 12988.249984741211, 14552.249984741211, 3434.25, 0.0, 0.0, 2092.0, 0.0, 18250.124954223633, 5999.374984741211, 0.0, 0.0, 0.0, 8500.199981689453, 0.0, 2145.0, 1530.3999938964844, 3001.2999877929688, 726.0, 2235.2999877929688, 2020.25]
18956


In [16]:
#TODO: Validate Output
validate_output(feat_out_buffer, f"{gat_dataset_path}/layer_2/", report_status="NONE")





In [17]:
handle_classification(feat_out_buffer, data_loader_instance)

- Golden : tensor([3, 4, 4,  ..., 3, 3, 3])
- DUT    : tensor([3, 4, 4,  ..., 0, 3, 3])

 => Accuracy = [92m76.1078[0m % (2061 / 2708)


### Debugger

In [18]:
format_print(sysreg.read(REG["i_gat_debug_3"]))

Last modified: 14:41 - 01/04
