In [1]:
import time
from pynq import Overlay
import pynq.lib.dma
from pynq import allocate
import numpy as np
from pynq import MMIO
from struct import *

In [2]:
ol = Overlay('/home/xilinx/pynq/overlays/fcl_accel/fcl_accel.bit')

In [3]:
ol?

In [3]:
# Initializing hardware
dma0 = ol.fclAccel.axi_dma_0
dma1 = ol.fclAccel.axi_dma_1
fcl_ip = MMIO(0x43c00000, 0x10000)
dma0_len = 0x18
dma1_len = 0x10

In [4]:
# Convolution layer parameters
k = 3
h = 224
w = 224
filters = 16
channels = 3

weights = filters*channels*k*k
dim0 = h*w
dim1 = 3 + (k*k) + dim0
winsize = k*k

In [5]:
# Initializing input data and buffers
buf1 = allocate(shape=(dim1,), dtype='float32')
buf1[:] = 0
inp = []
temp = []
ker = []
out = []

for n in range(filters):
    out.append(allocate(shape=(dim0,), dtype='float32'))

fdata = open("inputs.dat", "rb")
wdata = open("weights.dat", "rb")

for c in range(channels):
    inp.append(unpack('f'*dim0, fdata.read(4*dim0)))

for n in range(filters):
    temp = []
    for c in range(channels):
        temp.append(unpack('f'*winsize, wdata.read(4*winsize)))
    ker.append(temp)


In [6]:
# Run and time convolution accelerator
t_start = time.time()

t_fill_total = 0

for n in range(filters):
        
    for c in range(channels):
        
        t_fill = time.time()
        
        buf1[:] = 0
        
        buf1[0] = k
        buf1[1] = h
        buf1[2] = w
        
        buf1[3:(3+winsize)] = ker[n][c]
        buf1[(3+winsize):] = inp[c]
        
        t_fill_total += (time.time() - t_fill)
        
        fcl_ip.write(dma1_len, dim1)
        fcl_ip.write(dma0_len, dim0)

        dma1.sendchannel.transfer(buf1)
        dma0.sendchannel.transfer(out[n])
        dma0.recvchannel.transfer(out[n])
        
        dma1.sendchannel.wait()
        dma0.sendchannel.wait()
        dma0.recvchannel.wait()
        
    print("Filter " + str(n) + " completed")
        
t_stop = time.time()

buf1.close()

print('Hardware execution time: ', t_stop-t_start)
print(t_fill_total, "seconds were spent filling the buffers.")

Filter 0 completed
Filter 1 completed
Filter 2 completed
Filter 3 completed
Filter 4 completed
Filter 5 completed
Filter 6 completed
Filter 7 completed
Filter 8 completed
Filter 9 completed
Filter 10 completed
Filter 11 completed
Filter 12 completed
Filter 13 completed
Filter 14 completed
Filter 15 completed
Hardware execution time:  12.090102672576904
1.7720227241516113 seconds were spent filling the buffers.


In [7]:
# Open expected output data for validation
outdim = dim0*filters
odata = open("exp_output.dat", "rb")
exp = []

for x in range(filters):
    exp.append(allocate(shape=(dim0,), dtype='float32'))
    exp[x] = unpack('f'*dim0, odata.read(4*dim0))


In [8]:
# Compare accelerator output with expeceted outputs, print number of discrepancies
num_errors = 0

for y in range(filters):
    discrep = 0
    diff = 0.0
    for x in range(dim0):
        if out[y][x] != exp[y][x]:
            discrep += 1
            #diff += out[y][x] - exp[y][x]
            #print("Filter:", y, " Index:", x, " out:", out[y][x], " exp:", exp[y][x])
    print("Discrepancies in filter", y, ":", discrep)
    #if diff != 0:
        #print("Average difference:", diff/discrep)
    num_errors += discrep
    
if num_errors == 0:
    print("Success. Output data matches data from exp_output.dat")
else:
    print("Failure. There were", num_errors, "mismatched values compared to exp_output.dat")

Discrepancies in filter 0 : 0
Discrepancies in filter 1 : 0
Discrepancies in filter 2 : 0
Discrepancies in filter 3 : 0
Discrepancies in filter 4 : 0
Discrepancies in filter 5 : 0
Discrepancies in filter 6 : 0
Discrepancies in filter 7 : 0
Discrepancies in filter 8 : 0
Discrepancies in filter 9 : 0
Discrepancies in filter 10 : 0
Discrepancies in filter 11 : 0
Discrepancies in filter 12 : 0
Discrepancies in filter 13 : 0
Discrepancies in filter 14 : 0
Discrepancies in filter 15 : 0
Success. Output data matches data from exp_output.dat


In [9]:
# Close files
fdata.close()
wdata.close()
odata.close()