In [1]:
from pynq import Overlay
ol = Overlay("CS411_team0.bit")

In [2]:
ol?

In [None]:
import pynq
print(pynq.ps.Clocks.cpu_mhz)

In [4]:
bram_sp_arr = ol.axi_bram_ctrl_0.mmio.array
bram_a_arr = ol.axi_bram_ctrl_1.mmio.array
bram_w_arr = ol.axi_bram_ctrl_2.mmio.array
bram_o_arr = ol.axi_bram_ctrl_3.mmio.array

In [9]:
import numpy as np
def matmul_OS (mat1: np.array, mat2: np.array):
    M = mat1.shape[0]
    K = mat1.shape[1]
    N = mat2.shape[1]
    assert(mat1.shape[1] == mat2.shape[0])
#     assert(K <= 16)
    assert(M <= 8 and N <= 8)


    # serialize the matrix upload to BRAM (mat1: row wise | mat2: col wise)
    serial_mat1 = np.reshape(mat1, -1, order = 'F')
    serial_mat2 = mat2.flatten()
    
    assert(serial_mat1.shape[0] == M*K and serial_mat2.shape[0] == N*K)
    # load data to BRAM 
    bram_a_arr[0:serial_mat1.shape[0]] = serial_mat1
    bram_w_arr[0:serial_mat2.shape[0]] = serial_mat2
    
    # set Special Memory to specify M,K,N and mode (1: OS)
    bram_sp_arr[0:6] = [0,1,M,K,N,0]
    
    # start (set sp(addr0) = 1)
    bram_sp_arr[0] = 1
    
    #while until sp(addr0) => 0 or sp(addr100) =>1 
    while(bram_sp_arr[25] != 1):
        pass
    
    reshape_fit = np.reshape(bram_o_arr[0:M*N], (-1,N))
    
    #return result
    return reshape_fit[::-1].astype(np.int32)


def matmul_WS (mat1: np.array, mat2: np.array):
    M = mat2.shape[0]
    K = mat1.shape[0]
    N = mat2.shape[1]
    assert(mat1.shape[1]==mat2.shape[0])
    assert(K <= 8 and M <= 8 and N <= 8) # proj2
    
    # flip the matrix to satisfy the streaming order. 
    # And flat the matrix from 2D to 1D. 
    mat2 = mat2[::-1]
    serial_mat1 = mat1.flatten()
    serial_mat2 = mat2.flatten()

    # load data to BRAM 
    assert(serial_mat1.shape[0] == M*K and serial_mat2.shape[0] == N*M)
    bram_a_arr[0:serial_mat1.shape[0]] = serial_mat1
    bram_w_arr[0:serial_mat2.shape[0]] = serial_mat2
    
    # set Special Memory to specify M,K,N and mode (1: OS)
    bram_sp_arr[0:6] = [0,0,M,K,N,1]
    # start (set sp(addr0) = 1)
    bram_sp_arr[0] = 1
    #while until sp(addr0) => 0 or sp(addr100) =>1 
    while(bram_sp_arr[25] != 1):
        pass
        
    reshape_fit = np.reshape(bram_o_arr[0:K*N], (-1,N))
    return reshape_fit.astype(np.int32)

In [10]:
import random

def test_OS():
    score = 0
    num_test = 0
    ## OUTPUT STATIONARY TEST
    for _ in range (100):
        M = random.randint(1,8)
        K = random.randint(1,8) #long
#         print(K)
        N = random.randint(1,8)
        num_test = num_test +1
        mat1 = np.random.randint(-(1<<5),1<<5 -1 ,size=(M,K)) 
        mat2 = np.random.randint(-(1<<5),1<<5 -1 ,size=(K,N)) 
        fpga = (matmul_OS(mat1, mat2))
        mat_mul = (mat1@mat2)
        is_pass = np.equal(fpga,mat_mul).all()
#         print(is_pass)
#         print(fpga)
#         print(mat_mul)
#         print("====================")
        if(is_pass):
            score = score +1
            
            
    return score/num_test * 100

In [17]:
def test_WS():
    score = 0
    num_test = 0
    ## WEIGHT STATIONARY TEST
    for _ in range (100):
        M = random.randint(1,8)
        K = random.randint(1,8)
        N = random.randint(1,8)
        num_test = num_test +1
        mat1 = np.ones((K,M))#np.random.randint(0,2 ,size=(K,M)) 
        mat2 = np.ones((M,N))#np.random.randint(0,2 ,size=(M,N)) 
#         print(str(M) +" "+str(K) +" "+str(N) +" ")
#         print(matmul_WS(mat1, mat2))
#         print(mat1@mat2)
        if(np.equal(matmul_WS(mat1, mat2), mat1@mat2).all()):
            score = score + 1
#             print(str(M) +" "+str(K) +" "+str(N) +" ")
#             print("pass")
        else:
            print(str(M) +" "+str(K) +" "+str(N) +" ")
            print(mat1@mat2)
            print(matmul_WS(mat1, mat2))
#         print("============================")
    return score/num_test*100

In [18]:
test_OS()

100.0

In [19]:
test_WS()

100.0

In [20]:
def project2_test():
    print(" SCORE : " + str((test_WS() + test_OS()) * 0.5) + " / 100 " )

In [21]:
project2_test()

 SCORE : 100.0 / 100 


In [8]:
def probe_time():
    import time
    M = random.randint(1,8)
    K = random.randint(1,8)
    N = random.randint(1,8)
    mat1 = np.random.randint(-(1<<15),1<<15 -1 ,size=(K,M)) 
    mat2 = np.random.randint(-(1<<15),1<<15 -1 ,size=(M,N)) 
    start_fpga_WS = time.time()
    matmul_WS(mat1, mat2)
    end_fpga_WS = time.time()
    print(end_fpga_WS - start_fpga_WS)
    
    start_fpga_OS = time.time()
    matmul_OS(mat1, mat2)
    end_fpga_OS = time.time()
    print(end_fpga_OS - start_fpga_OS)
    
    start_python = time.time()
    mat1@mat2
    end_python = time.time()
    print(end_python - start_python)
    print("ratio_WS   " + str((end_fpga_WS - start_fpga_WS)/(end_python - start_python)))
    print("ratio_OS   " + str((end_fpga_OS - start_fpga_OS)/(end_python - start_python)))

In [9]:
probe_time()

0.006850004196166992
0.0027151107788085938
7.510185241699219e-05
ratio_WS   91.20952380952382
ratio_OS   36.15238095238095
