# DSD-2019 Project (MNIST)
---
# Hardware control inference

In [None]:
from layers_mnist import *
from bitop_mnist import *
from setup_mnist import *
from scale_uart import *
from matplotlib import pyplot as plt
import time
import numpy as np
import glob
import platform
import time

%matplotlib inline

### Load dataset for image generate

In [None]:
# TEST SET ORIGIN
X_test_origin, y_test = load_mnist("./dataset_mnist/t10k-images-idx3-ubyte", "./dataset_mnist/t10k-images-idx1-ubyte")

### Simulation dataset for our 8-bit MAC unit

In [None]:
X_test_ = np.load("./mnist_dataset_quan/images_100.npy")

## Load network parameter
---

In [None]:
# 8-bit quantization network param
conv1_w_ = np.load("./mnist_network_quan_param/mnist_conv1_weight_quan.npy")
conv1_b_ = np.load("./mnist_network_quan_param/mnist_conv1_bias_quan.npy")
conv2_w_ = np.load("./mnist_network_quan_param/mnist_conv2_weight_quan.npy")
conv2_b_ = np.load("./mnist_network_quan_param/mnist_conv2_bias_quan.npy")
fc1_w_ = np.load("./mnist_network_quan_param/mnist_fc1_weight_quan.npy")
fc1_b_ = np.load("./mnist_network_quan_param/mnist_fc1_bias_quan.npy")
fc2_w_ = np.load("./mnist_network_quan_param/mnist_fc2_weight_quan.npy")
fc2_b_ = np.load("./mnist_network_quan_param/mnist_fc2_bias_quan.npy")

## Test for accuracy  
---
Do inference

### Board connection

In [None]:
def port_list():
    os_name = platform.system()
    if "Windows" in os_name:
        print("Current OS: Windows")
        ports = ['COM%s' %(i+1) for i in range(256)]
    elif "Linux"in os_name:
        print("Current OS: Linux")
        ports = glob.glob('/dev/tty[A-Za-z]*')
    elif "Darwin" in os_name:
        print("Current OS: Mac")
        ports = glob.glob('/dev/tty.*')
    result = []
    for p in ports:
        try:
            s = serial.Serial(p)
            s.close()
            result.append(p)
        except (OSError, serial.SerialException):
            pass
    print(result)
    return result

In [None]:
plist = port_list()

In [None]:
# Fisrt, Set the connection configuration and Port
# PORT-name may be vary depending on your systems.
# USE USB serial port.

for pname in plist:
    try:
        SU = Scale_UART(str(pname))
        print("%s port connected!" %(pname))
        break
    except serial.SerialException:
        print("%s port cannot be connected." %(pname))

### Setting the VDMA

In [None]:
## DO NOT CHANGE 
## IT IS VDMA AND EACH MODULE'S BASE ADDRESS FOR CONTROL APB + AXI
##### PARAMETER INFORMATION
VDMA0_BASE_ADDR= 0x0c00_0000
VDMA1_BASE_ADDR= 0x0c10_0000
VDMA2_BASE_ADDR= 0x0c20_0000

FC_BASE_ADDR   = 0x0d00_0000
CONV_BASE_ADDR = 0x0d10_0000
POOL_BASE_ADDR = 0x0d20_0000

### FIXED FOR OUR NETWORK
OP_SIZE                        = 4
ADDR_SIZE                      = 28
DATA_SIZE                      = 32

Image address memory map  
---
Addresss range: 0x0000_0000 ~ 0x00FF_FFFF    
Size: 2048KB

In [None]:
### WHOLE IMAGES ###
start = time.time()
SU.su_set_image({'BASE_ADDR': 0x0000_0000}, "./mnist_dataset_quan/images_100.npy")
print("image set done")
print("\tTotal time: {:.2f} sec".format(time.time() - start))

In [None]:
### ONE IMAGE ###
# start = time.time()
# SU.su_set_image_one({'BASE_ADDR': 0x0000_0000}, "./mnist_network_quan_param/test_set_quan_small.npy")
# print("image set done")
# print("\tTotal time: {:.2f} sec".format(time.time() - start))

Conv1 memory map
---
Convolution 1  
Weight   
&nbsp;&nbsp;&nbsp;Address range: 0x0100_0000 ~ 0x010F_FFFF   
&nbsp;&nbsp;&nbsp;Size: 1024KB   
bias   
&nbsp;&nbsp;&nbsp;Address range: 0x0110_0000 ~ 0x011F_FFFF   
&nbsp;&nbsp;&nbsp;Size: 1024KB   
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0600_0000 ~ 0x060F_FFFF       
&nbsp;&nbsp;&nbsp;Size: 1024KB   

In [None]:
print("conv1 parameter load")
start = time.time()
SU.su_set_conv_w({'BASE_ADDR': 0x0100_0000}, "./mnist_network_quan_param/mnist_conv1_weight_quan.npy")
SU.su_set_conv_b({'BASE_ADDR': 0x0110_0000}, "./mnist_network_quan_param/mnist_conv1_bias_quan.npy")
print("conv1 set done")
print("\tTotal time: {:.2f} sec".format(time.time() - start))

In [None]:
print("conv2 parameter load")
start = time.time()
SU.su_set_conv_w({'BASE_ADDR': 0x0200_0000}, "./mnist_network_quan_param/mnist_conv2_weight_quan.npy")
SU.su_set_conv_b({'BASE_ADDR': 0x0210_0000}, "./mnist_network_quan_param/mnist_conv2_bias_quan.npy")
print("conv2 set done")
print("\tTotal time: {:.2f} sec".format(time.time() - start))

Pool1 memory map
---
Max Pool 1  
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0610_0000 ~ 0x061F_FFFF      
&nbsp;&nbsp;&nbsp;Size: 1024KB   

Conv2 memory map
---
Convolution 2  
Weight   
&nbsp;&nbsp;&nbsp;Address range: 0x0200_0000 ~ 0x020F_FFFF  
&nbsp;&nbsp;&nbsp;Size: 1024KB   
bias   
&nbsp;&nbsp;&nbsp;Address range: 0x0210_0000 ~ 0x021F_FFFF   
&nbsp;&nbsp;&nbsp;Size: 1024KB   
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0620_0000 ~ 0x062F_FFFF       
&nbsp;&nbsp;&nbsp;Size: 1024KB   

Pool2 memory map
---
Max Pool 2  
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0630_0000 ~ 0x063F_FFFF      
&nbsp;&nbsp;&nbsp;Size: 1024KB   

FC1 memory map
---
Fully-Connected 1    
Weight   
&nbsp;&nbsp;&nbsp;Address range: 0x0300_0000 ~ 0x03DF_FFFF  
&nbsp;&nbsp;&nbsp;Size: 14336KB   
bias   
&nbsp;&nbsp;&nbsp;Address range: 0x03f0_0000 ~ 0x03FF_FFFF     
&nbsp;&nbsp;&nbsp;Size:  1024KB   
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0640_0000 ~ 0x064F_FFFF       
&nbsp;&nbsp;&nbsp;Size:  1024KB 

In [None]:
print("fc1 parameter load")
start = time.time()
SU.su_set_fc_w({'BASE_ADDR': 0x0300_0000}, "./mnist_network_quan_param/mnist_fc1_weight_quan.npy")
SU.su_set_fc_b({'BASE_ADDR': 0x03F0_0000}, "./mnist_network_quan_param/mnist_fc1_bias_quan.npy")
print("fc1 set done")
print("\tTotal time: {:.2f} sec".format(time.time() - start))

FC2 memory map
---
Fully-Connected 2  
Weight   
&nbsp;&nbsp;&nbsp;Address range: 0x0400_0000 ~ 0x040F_FFFF  
&nbsp;&nbsp;&nbsp;Size: 1024KB   
bias   
&nbsp;&nbsp;&nbsp;Address range: 0x0410_0000 ~ 0x041F_FFFF   
&nbsp;&nbsp;&nbsp;Size: 1024KB   
output   
&nbsp;&nbsp;&nbsp;Addresss range: 0x0650_0000 ~ 0x065F_FFFF       
&nbsp;&nbsp;&nbsp;Size: 1024KB 

In [None]:
print("fc2 parameter load")

start = time.time()
SU.su_set_fc_w({'BASE_ADDR': 0x0400_0000}, "./mnist_network_quan_param/mnist_fc2_weight_quan.npy")
SU.su_set_fc_b({'BASE_ADDR': 0x0410_0000}, "./mnist_network_quan_param/mnist_fc2_bias_quan.npy")
print("fc2 set done")
print("\tTotal time: {:.2f} sec".format(time.time() - start))

### You can check the parameter by below code

In [None]:
debug_data = np.load("./mnist_dataset_quan/images_1000.npy")

In [None]:
print(debug_data.shape)

In [None]:
# Print in 4 Bytes
# One image
debug_flat = debug_data.flatten()
for i in range(int(1 * 1 * 28 * 28 / 4)):
    temp = debug_flat[i*4:i*4+4]
    print(i, "\t", temp)

In [None]:
debug_flat_bin = to_8bit_fixed_binary(debug_flat)
for i in range(int(1 * 1 * 28 * 28 / 4)):
    temp = debug_flat_bin[i*4:i*4+4]
    print(i, "\t", temp)

In [None]:
# Check for written data in DRAM
base_addr_debug = 0x0000_0000 # input image
for i in range(int(1 * 1 * 28 * 28 / 4)):
    data = SU.su_read_data(base_addr_debug + i*4)
    print(data)

### INFERENCE

In [None]:
###################################################################
#        Convolution 1 + ReLU
###################################################################
# Convolution
# - in:       (n, 1, 28, 28)
# - out:     (n, 32, 28, 28)
# - weight:    (32, 1, 3, 3)
# - bias:               (32)
# ReLU
# - in:      (n. 32. 28. 28)
# - out:     (n. 32. 28. 28)
###################################################################
I = {'IN_CH': 1, 'OUT_CH': 32, 'FLEN': 28}
F = {'BASE_ADDR': 0x0000_0000, 'STRIDE_SIZE': 1*28*28, 'HSIZE': 1*28*28, 'VSIZE': 1}
W = {'BASE_ADDR': 0x0100_0000, 'STRIDE_SIZE': 32*3*3, 'HSIZE': 32*3*3, 'VSIZE': 1}
B = {'BASE_ADDR': 0x0110_0000, 'STRIDE_SIZE': 32, 'HSIZE': 32, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0600_0000, 'STRIDE_SIZE': 32*28*28, 'HSIZE': 32*28*28, 'VSIZE': 1}
SU.su_conv_control(I, F, W, B, R, VDMA1_BASE_ADDR, CONV_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0600_0000
for i in range(int(32*28*28/4)):
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

In [None]:
###################################################################
#        Max Pool 1
###################################################################
# Max Pooling
# - in:      (n. 32. 28. 28)
# - out:     (n, 32, 14, 14)
###################################################################
I = {'IN_CH': 32, 'FLEN': 28}
F = {'BASE_ADDR': 0x0600_0000, 'STRIDE_SIZE': 32*28*28, 'HSIZE': 32*28*28, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0610_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
SU.su_pool_control(I, F, R, VDMA2_BASE_ADDR, POOL_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0610_0000
for i in range(int(32*14*14/4)):
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

In [None]:
###################################################################
#        Convolution 2 + ReLU
###################################################################
# Convolution
# - in:       (n, 32, 14, 14)
# - out:      (n, 32, 14, 14)
# - weight:    (32, 32, 3, 3)
# - bias:                (32)
# ReLU
# - in:       (n. 32. 14. 14)
# - out:      (n. 32. 14. 14)
###################################################################
I = {'IN_CH': 32, 'OUT_CH': 32, 'FLEN': 14}
F = {'BASE_ADDR': 0x0610_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
W = {'BASE_ADDR': 0x0200_0000, 'STRIDE_SIZE': 32*32*3*3, 'HSIZE': 32*32*3*3, 'VSIZE': 1}
B = {'BASE_ADDR': 0x0210_0000, 'STRIDE_SIZE': 32, 'HSIZE': 32, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0620_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
SU.su_conv_control(I, F, W, B, R, VDMA1_BASE_ADDR, CONV_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0620_0000
for i in range(int(32*14*14/4)):
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

In [None]:
###################################################################
#        Max Pool 2
###################################################################
# Max Pooling
# - in:      (n. 32. 14. 14)
# - out:       (n, 32, 7, 7)
###################################################################
I = {'IN_CH': 32, 'FLEN': 14}
F = {'BASE_ADDR': 0x0620_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0630_0000, 'STRIDE_SIZE': 32*7*7, 'HSIZE': 32*7*7, 'VSIZE': 1}
SU.su_pool_control(I, F, R, VDMA2_BASE_ADDR, POOL_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0630_0000
for i in range(int(32*7*7/4)):
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

In [None]:
###################################################################
#        Fully-Connected 1 + ReLU
###################################################################
# Fully-Connected
# - in:              (1568,)
# - out:              (256,)
# - weight:      (256, 1568)
# - bias:             (256,)
# ReLU
# - in:               (256,)
# - out:              (256,)
###################################################################
F = {'BASE_ADDR': 0x0630_0000, 'STRIDE_SIZE': 1568, 'HSIZE': 1568, 'VSIZE': 1}
W = {'BASE_ADDR': 0x0300_0000, 'STRIDE_SIZE': int(1568*256/8), 'HSIZE': int(1568*256/8), 'VSIZE': 8}
B = {'BASE_ADDR': 0x03F0_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0640_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
SU.su_fc_control(F, W, B, R, VDMA0_BASE_ADDR, FC_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0640_0000
for i in range(int(256/4)):
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

In [None]:
###################################################################
#        Fully-Connected 2
###################################################################
# Fully-Connected
# - in:              (256,)
# - out:              (10,)
# - weight:      (10,  256)
# - bias:             (10,)
###################################################################
F = {'BASE_ADDR': 0x0640_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
W = {'BASE_ADDR': 0x0400_0000, 'STRIDE_SIZE': 10*256, 'HSIZE': 10*256, 'VSIZE': 1}
B = {'BASE_ADDR': 0x0410_0000, 'STRIDE_SIZE': 10, 'HSIZE': 10, 'VSIZE': 1}
R = {'BASE_ADDR': 0x0650_0000, 'STRIDE_SIZE': 10, 'HSIZE': 10, 'VSIZE': 1}
SU.su_fc_control(F, W, B, R, VDMA0_BASE_ADDR, FC_BASE_ADDR)

In [None]:
# You can check the result of first layer by below code
a = 0x0650_0000
for i in range(3:
    temp = SU.su_read_data(a + 4*i)
    print(i, "\t", temp)

### All Inference function

In [None]:
def inference(image_idx):
    I = {'IN_CH': 1, 'OUT_CH': 32, 'FLEN': 28}
    F = {'BASE_ADDR': 0x0000_0000+784*image_idx, 'STRIDE_SIZE': 1*28*28, 'HSIZE': 1*28*28, 'VSIZE': 1}
    W = {'BASE_ADDR': 0x0100_0000, 'STRIDE_SIZE': 32*3*3, 'HSIZE': 32*3*3, 'VSIZE': 1}
    B = {'BASE_ADDR': 0x0110_0000, 'STRIDE_SIZE': 32, 'HSIZE': 32, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0600_0000, 'STRIDE_SIZE': 32*28*28, 'HSIZE': 32*28*28, 'VSIZE': 1}
    SU.su_conv_control(I, F, W, B, R, VDMA1_BASE_ADDR, CONV_BASE_ADDR)
    
    I = {'IN_CH': 32, 'FLEN': 28}
    F = {'BASE_ADDR': 0x0600_0000, 'STRIDE_SIZE': 32*28*28, 'HSIZE': 32*28*28, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0610_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
    SU.su_pool_control(I, F, R, VDMA2_BASE_ADDR, POOL_BASE_ADDR)
    
    I = {'IN_CH': 32, 'OUT_CH': 32, 'FLEN': 14}
    F = {'BASE_ADDR': 0x0610_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
    W = {'BASE_ADDR': 0x0200_0000, 'STRIDE_SIZE': 32*32*3*3, 'HSIZE': 32*32*3*3, 'VSIZE': 1}
    B = {'BASE_ADDR': 0x0210_0000, 'STRIDE_SIZE': 32, 'HSIZE': 32, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0620_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
    SU.su_conv_control(I, F, W, B, R, VDMA1_BASE_ADDR, CONV_BASE_ADDR)
    
    I = {'IN_CH': 32, 'FLEN': 14}
    F = {'BASE_ADDR': 0x0620_0000, 'STRIDE_SIZE': 32*14*14, 'HSIZE': 32*14*14, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0630_0000, 'STRIDE_SIZE': 32*7*7, 'HSIZE': 32*7*7, 'VSIZE': 1}
    SU.su_pool_control(I, F, R, VDMA2_BASE_ADDR, POOL_BASE_ADDR)
    
    F = {'BASE_ADDR': 0x0630_0000, 'STRIDE_SIZE': 1568, 'HSIZE': 1568, 'VSIZE': 1}
    W = {'BASE_ADDR': 0x0300_0000, 'STRIDE_SIZE': int(1568*256/4), 'HSIZE': int(1568*256/4), 'VSIZE': 4}
    B = {'BASE_ADDR': 0x03F0_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0640_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
    SU.su_fc_control(F, W, B, R, VDMA0_BASE_ADDR, FC_BASE_ADDR)
    
    F = {'BASE_ADDR': 0x0640_0000, 'STRIDE_SIZE': 256, 'HSIZE': 256, 'VSIZE': 1}
    W = {'BASE_ADDR': 0x0400_0000, 'STRIDE_SIZE': 10*256, 'HSIZE': 10*256, 'VSIZE': 1}
    B = {'BASE_ADDR': 0x0410_0000, 'STRIDE_SIZE': 10, 'HSIZE': 10, 'VSIZE': 1}
    R = {'BASE_ADDR': 0x0650_0000, 'STRIDE_SIZE': 10, 'HSIZE': 10, 'VSIZE': 1}
    SU.su_fc_control(F, W, B, R, VDMA0_BASE_ADDR, FC_BASE_ADDR)
    ################################ FIXED ##############################
    label = SU.su_read_data(FC_BASE_ADDR + 0x20)
    label = int.from_bytes(label, 'big', signed=True)
    return (label-1)

### Check accuracy

In [None]:
# Fix
label = SU.su_read_data(FC_BASE_ADDR + 0x20)
label = int.from_bytes(label, 'big', signed=True)
return (label - 1)

In [None]:
acc = 0
count = 100
for i in range(count):
    pred = inference(i)
    if y_test[i] == pred:
        acc += 1
    if i % 10 == 0:
        gen_image(X_test_origin[i]).show()
        print("Label: %d" %(y_test[i]))
print("Total accuracy: ", float(acc / count) * 100)