当プログラム作成に当たって以下のソースコードを大いに参考にしました。

https://qiita.com/jin0g/items/8e59b5abb003df3b847d#1-%E9%AB%98%E9%80%9F%E5%8C%96%E3%81%99%E3%82%8B%E3%82%BF%E3%83%BC%E3%82%B2%E3%83%83%E3%83%88%E3%81%AE%E6%B1%BA%E5%AE%9A

copyright : 2020 Akira Jinguji



https://github.com/msiplab/VieWork.git

copyright : 2020 Shogo Muramatsu

In [None]:
import pynq
import pandas as pd
import numpy as np
import matplotlib as plt

# 合成した回路をPYNQを通じてFPGAに書き込む
ol = pynq.Overlay('./design_1.bit')
mmio = ol.product_0.mmio

# Memory Mapped I/Oを通じてNumpyインターフェイスでAXI LITEレジスタにアクセスできる
# Numpyのアクセス幅が32bit整数であるため、アドレスを32bit/8bit(1byte)=4で割る
def ndarray_from_mmio(name, size, dtype):
    reginfo = ol.ip_dict['product_0']['registers'][name]
    addr_start = reginfo['address_offset'] // 4
    addr_end = addr_start + reginfo['size'] // 4
    mmio_array = mmio.array[addr_start:addr_end]
    mmio_array.dtype = np.int16
    return mmio_array.reshape(size)

mmio_x1 = ndarray_from_mmio('Memory_x1', size=(16, 16), dtype=np.int16)
mmio_x2 = ndarray_from_mmio('Memory_x2', size=(16, 16), dtype=np.int16)
mmio_y1 = ndarray_from_mmio('Memory_y', size=(16, 16), dtype=np.int16)

DONE = 0x02

def mydot(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
    # 入力データを書き込む
    mmio_x1[:] = x1
    mmio_x2[:] = x2
    # 回路の動作開始の指示
    mmio.write(0, 1)
    # 回路が終了するのを待つ
    while not mmio.read(0) & DONE: pass
    # 結果を返す
    return mmio_y1.copy()

In [None]:
#入力行列
arrayX = np.array(
 [[97, 16, 99, 96, 20, 94, 60, 50, 36, 91, 96, 76, 23, 29, 51, 16],
 [90, 58, 84, 87, 60, 31, 83, 10, 19, 40, 89, 41, 51, 55, 44, 70],
 [36, 22, 56, 85, 87, 25, 94, 45, 52, 76, 59, 14, 36, 37, 23, 31],
 [79, 27, 69, 68, 49, 88, 45, 87, 85, 99, 25, 35, 81, 44, 98, 33],
 [35, 95, 82, 36, 75, 75, 30, 24, 45, 27, 85, 27, 34, 91, 93, 52],
 [80, 66, 88, 54, 39, 89, 47, 96, 24, 14, 85, 84, 67, 88, 59, 87],
 [64, 41, 90, 47, 66, 10, 36, 10, 98, 31, 72, 20, 22, 45, 47, 29],
 [26, 37, 72, 84, 54, 99, 76, 80, 36, 38, 10, 89, 43, 68, 80, 29],
 [82, 74, 11, 11, 16, 11, 91, 26, 45, 86, 62, 82, 19, 13, 30, 57],
 [81, 61, 46, 99, 52, 77, 31, 20, 94, 66, 32, 55, 57, 10, 76, 76],
 [36, 82, 97, 62, 36, 20, 97, 97, 26, 66, 57, 43, 84, 42, 11, 25],
 [57, 66, 37, 25, 85, 75, 42, 97, 48, 75, 38, 43, 81, 74, 48, 66],
 [50, 55, 96, 10, 89, 93, 50, 48, 29, 15, 67, 32, 19, 66, 85, 31],
 [84, 92, 10, 31, 97, 80, 39, 19, 28, 63, 53, 43, 55, 86, 35, 83],
 [99, 99, 50, 18, 46, 56, 30, 83, 19, 25, 89, 32, 16, 19, 60, 44],
 [51, 65, 72, 77, 75, 89, 27, 76, 80, 18, 91, 74, 36, 90, 47, 82]])

#順変換行列
dctSize = 16
arrayDct = np.array(np.zeros([dctSize,dctSize]))
for iCol in range(dctSize):
    for iRow in range(dctSize):
        k = 1
        if iRow == 0:
            k = 1/np.sqrt(2)
        arrayDct[iRow,iCol] = np.sqrt(2/dctSize)*k*np.cos((2*iCol+1)*iRow*np.pi/(2*dctSize))

#FPGA用の順変換行列  PYNQは整数しか扱えないので、上記の順変換行列を千倍して丸め込んで整数にする        
Int_arrayDct = np.floor(10*arrayDct) 

#逆変換行列
arrayIdct = arrayDct.T

#FPGA用の逆変換行列  上記と同様に整数値になっている
Int_arrayIdct = Int_arrayDct.T 

In [None]:
#2次元DCT

#CPU(arm上のnumpyモジュール)での順変換
arrayY = np.dot(np.dot(arrayDct, arrayX), arrayDct.T)

                
#FPGA(自作の行列積IP)での順変換
y_test = mydot(mydot(Int_arrayDct, np.floor(arrayX/10))/10, Int_arrayDct.T)


#比較
#display(arrayY)
#display(y_test)

In [None]:
#2次元逆DCT

#CPU(arm上のnumpyモジュール)での逆変換
arrayR = np.dot(np.dot(arrayIdct, arrayY),arrayIdct.T)

#FPGA(自作の行列積IP)での逆変換
#本来のものよりも10倍になっている変換行列を4回かけているので、10000で割る
iy_test = mydot(mydot(Int_arrayIdct, y_test), Int_arrayIdct.T)/100

#display(arrayR)
#display(iy_test)

In [None]:
print('Validation:', np.all(arrayR == iy_test))
print()
print('=> Running 1000 times on CPU')
%time for i in range(1000): _ = np.dot(np.dot(arrayIdct, arrayY),arrayIdct.T)
print()
print('=> Running 1000 times on FPGA')
%time for i in range(1000): _ = mydot(mydot(Int_arrayIdct, iy_test), Int_arrayIdct.T)/100

Validation: False

=> Running 1000 times on CPU
CPU times: user 140 ms, sys: 0 ns, total: 140 ms
Wall time: 139 ms

=> Running 1000 times on FPGA
CPU times: user 1.3 s, sys: 0 ns, total: 1.3 s
Wall time: 1.3 s


In [None]:
bf = pd.DataFrame(arrayX)
cf = pd.DataFrame(arrayR)
df = pd.DataFrame(iy_test)

display(bf)
display(cf)
display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,97,16,99,96,20,94,60,50,36,91,96,76,23,29,51,16
1,90,58,84,87,60,31,83,10,19,40,89,41,51,55,44,70
2,36,22,56,85,87,25,94,45,52,76,59,14,36,37,23,31
3,79,27,69,68,49,88,45,87,85,99,25,35,81,44,98,33
4,35,95,82,36,75,75,30,24,45,27,85,27,34,91,93,52
5,80,66,88,54,39,89,47,96,24,14,85,84,67,88,59,87
6,64,41,90,47,66,10,36,10,98,31,72,20,22,45,47,29
7,26,37,72,84,54,99,76,80,36,38,10,89,43,68,80,29
8,82,74,11,11,16,11,91,26,45,86,62,82,19,13,30,57
9,81,61,46,99,52,77,31,20,94,66,32,55,57,10,76,76


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,97.0,16.0,99.0,96.0,20.0,94.0,60.0,50.0,36.0,91.0,96.0,76.0,23.0,29.0,51.0,16.0
1,90.0,58.0,84.0,87.0,60.0,31.0,83.0,10.0,19.0,40.0,89.0,41.0,51.0,55.0,44.0,70.0
2,36.0,22.0,56.0,85.0,87.0,25.0,94.0,45.0,52.0,76.0,59.0,14.0,36.0,37.0,23.0,31.0
3,79.0,27.0,69.0,68.0,49.0,88.0,45.0,87.0,85.0,99.0,25.0,35.0,81.0,44.0,98.0,33.0
4,35.0,95.0,82.0,36.0,75.0,75.0,30.0,24.0,45.0,27.0,85.0,27.0,34.0,91.0,93.0,52.0
5,80.0,66.0,88.0,54.0,39.0,89.0,47.0,96.0,24.0,14.0,85.0,84.0,67.0,88.0,59.0,87.0
6,64.0,41.0,90.0,47.0,66.0,10.0,36.0,10.0,98.0,31.0,72.0,20.0,22.0,45.0,47.0,29.0
7,26.0,37.0,72.0,84.0,54.0,99.0,76.0,80.0,36.0,38.0,10.0,89.0,43.0,68.0,80.0,29.0
8,82.0,74.0,11.0,11.0,16.0,11.0,91.0,26.0,45.0,86.0,62.0,82.0,19.0,13.0,30.0,57.0
9,81.0,61.0,46.0,99.0,52.0,77.0,31.0,20.0,94.0,66.0,32.0,55.0,57.0,10.0,76.0,76.0


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,105.94,-203.23,-55.72,-92.7,-121.59,-103.17,-71.92,-107.8,-76.7,-70.78,-69.85,-56.49,-92.78,-123.26,-95.83,-134.84
1,-147.08,258.64,132.94,222.04,143.65,152.39,146.59,121.36,74.37,132.56,162.58,124.92,123.73,158.55,144.11,171.77
2,-76.76,69.28,41.24,99.22,83.67,27.59,90.87,64.06,44.61,75.38,42.08,9.4,43.33,46.35,35.17,50.03
3,-124.48,196.93,79.88,155.89,104.62,162.39,88.47,163.59,107.56,164.5,65.56,88.13,145.64,124.73,154.86,118.37
4,-118.65,183.46,86.61,95.35,109.4,115.22,54.77,70.62,57.1,59.95,108.12,53.02,70.93,144.19,132.8,110.15
5,-112.47,195.26,85.77,134.03,82.9,159.64,70.8,154.43,41.13,65.44,122.02,122.6,111.39,155.17,98.78,150.87
6,-51.16,98.47,86.49,59.44,67.6,23.68,45.99,29.73,84.0,39.18,77.07,27.87,36.25,52.4,57.56,37.11
7,-143.92,163.11,98.9,176.51,109.44,167.14,109.62,137.04,53.99,88.17,56.51,129.17,91.12,136.73,136.5,94.13
8,-23.97,132.1,-9.18,35.47,7.01,9.5,87.31,41.35,47.21,89.45,59.18,81.09,23.01,20.4,36.44,64.03
9,-91.15,185.82,41.59,173.06,94.13,139.96,61.22,74.38,105.74,106.46,55.64,93.13,89.06,69.51,129.4,142.05
