In [1]:
from pynq import Overlay, GPIO, Register, allocate, MMIO
import os
import numpy as np

import struct


In [2]:
def float_to_int(f):
    return (struct.unpack('<I', struct.pack('<f', f))[0])

def int_to_float(i):
    return (struct.unpack('<f', struct.pack('<I', i))[0])

In [3]:
overlay = Overlay("nn.bit")
fcc1=overlay.forward_fcc_0
fcc1.register_map

bck1=overlay.backward_fcc_0
bck1.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x = Register(x=0),
  w = Register(w=0),
  b = Register(b=0),
  dx = Register(dx=0),
  dy = Register(dy=0),
  xdimension = Register(xdimension=0),
  ydimension = Register(ydimension=0),
  lr = Register(lr=0)
}

In [7]:
class FullyConnectedLayer():
    def __init__(self,xdim,ydim,base_addr):
        self.xdim=xdim
        self.ydim=ydim

        self.base_addr=base_addr

        self.BASE_ADDRESS_X=base_addr
        self.BASE_ADDRESS_DX=self.BASE_ADDRESS_X+xdim*4+4

        self.BASE_ADDRESS_W=self.BASE_ADDRESS_DX+xdim*4+4

        self.BASE_ADDRESS_B=self.BASE_ADDRESS_W+xdim*ydim*4+4

        self.BASE_ADDRESS_Y=self.BASE_ADDRESS_B+ydim*4+4
        self.BASE_ADDRESS_DY=self.BASE_ADDRESS_Y+ydim*4+4  

        self.mmio_x= MMIO(self.BASE_ADDRESS_X,self.xdim*4)
        self.mmio_dx=MMIO(self.BASE_ADDRESS_DX,self.xdim*4)

        self.mmio_w= MMIO(self.BASE_ADDRESS_W,self.xdim*self.ydim*4)

        self.mmio_y= MMIO(self.BASE_ADDRESS_Y,self.ydim*4)
        self.mmio_dy= MMIO(self.BASE_ADDRESS_DY,self.ydim*4)

        self.mmio_b= MMIO(self.BASE_ADDRESS_B,self.ydim*4)

        self.config_dic={'base':base_addr, 'x':self.BASE_ADDRESS_X,'w':self.BASE_ADDRESS_W,'y':self.BASE_ADDRESS_Y,'b':self.BASE_ADDRESS_B, 'dx':self.BASE_ADDRESS_DX, 'dy':self.BASE_ADDRESS_DY,'xdim':self.xdim,'ydim':self.ydim}

    def get_config_dic(self):
        
        return self.config_dic


    def initHardware(self,fwip,bckip,lr):

        self.fwip=fwip
        self.fwip.register_map.x=self.BASE_ADDRESS_X
        self.fwip.register_map.w=self.BASE_ADDRESS_W
        self.fwip.register_map.y=self.BASE_ADDRESS_Y
        self.fwip.register_map.b=self.BASE_ADDRESS_B
        self.fwip.register_map.xdimension=self.xdim
        self.fwip.register_map.ydimension=self.ydim

        self.bckip= bckip
        self.bckip.register_map.x=self.BASE_ADDRESS_X
        self.bckip.register_map.w=self.BASE_ADDRESS_W
        self.bckip.register_map.b=self.BASE_ADDRESS_B
        self.bckip.register_map.xdimension=self.xdim
        self.bckip.register_map.ydimension=self.ydim
        self.bckip.register_map.dx=self.BASE_ADDRESS_DX
        self.bckip.register_map.dy=self.BASE_ADDRESS_DY
        self.bckip.register_map.lr=float_to_int(lr)

    def reset_weights(self):

        for i in range(self.xdim*self.ydim):
            self.mmio_w.write(i*4,float_to_int(np.random.random()*0.1))
        
        for i in range(self.ydim):
            self.mmio_b.write(i*4,float_to_int(np.random.random()*0.1))
            
    def get_weights(self):
        
        w=[]
        b=[]
        
        for i in range(self.xdim*self.ydim):
            w.append(int_to_float(self.mmio_w.read(i*4)))
        
        for i in range(self.ydim):
            b.append(int_to_float(self.mmio_b.read(i*4)))
        
        return w,b
    
    def get_input(self):
        x=[]     
        for i in range(self.xdim):
            x.append(int_to_float(self.mmio_x.read(i*4)))     
        return x
    

    def reset_input(self):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,0)
            self.mmio_dx.write(i*4,0)

    def reset_output(self):
        for i in range(self.ydim):
            self.mmio_y.write(i*4,0)
            self.mmio_dy.write(i*4,0)

    def set_input(self,x):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,float_to_int(x[i]))
    
    def get_output(self):
        y=[]
        for i in range(self.ydim):
            y.append(int_to_float(self.mmio_y.read(i*4)))
        return y
    
    def get_dy(self):
        dy=[]
        for i in range(self.ydim):
            dy.append(int_to_float(self.mmio_dy.read(i*4)))
        return dy

    def set_dy(self,dy):
        
        for i in range(self.ydim):
            self.mmio_dy.write(i*4,float_to_int(dy[i]))

    def fwprop(self):

        self.fwip.write(0x00, 1)
        fpga_state = self.fwip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fwip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fwip.write(0x00, 4)
                break

        self.fwip.write(0x00, 4)

    def bckprop(self):
        self.bckip.write(0x00, 1)
        fpga_state = self.bckip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.bckip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.bckip.write(0x00, 4)
                break

        self.bckip.write(0x00, 4)
        


In [10]:
import copy
class Neural_Net():

    def __init__(self,mem_base_addr):

        self.mem_base_addr=mem_base_addr
        self.layers=[]
        self.last_layer_out_address=mem_base_addr
        self.layer_base_addresses=[]
        self.layer_configs=[]
        self.nlayers=0

    def add(self,input_shape,output_shape):
        layer=FullyConnectedLayer(input_shape,output_shape,self.last_layer_out_address)
        self.layers.append(layer)
        cfg=layer.get_config_dic()
        self.last_layer_out_address=cfg['y']
        layer.reset_weights()
        self.layer_configs.append(cfg)
        self.nlayers+=1
       
    
    def get_layer_output(self,i):

        return self.layers[i].get_output()

    
    def get_weights(self,i):
        
        return (self.layers[i].get_weights())
    
    def get_input(self,i):
        return self.layers[i].get_input()

    def set_weights(self,weights):        
        for i in range(self.nlayers):
            self.layers[i].set_weights(weights[i])
            
    def get_dy(self,i):
        return self.layers[i].get_dy()
    
    def get_output(self,i):           
        
        return (self.layers[i].get_output())
    
    def calculate_mse_loss(self,y,y_):
        return np.sum(np.square(y-y_))/len(y)

    def calculate_gradient(self,y,ytrue):
        return 2*(y-ytrue)/len(y)
    
#     def debug():
#         for i in range(self.nlayers):
            
#             w,b=self.get_weights(i)
#             y=self.get_output(i)
#             x=self.get_input(i)
#             print("Layer"+i+"input="+x)
#             print("Layer"+i+"weights="+w)
#             print("Layer"+i+"bias="+b)
#             print("Layer"+i+"outputs="+y)
        
    def runfwprop(self,x,learning_rate):

        self.layers[0].set_input(x)
        for i in range(self.nlayers):
            self.layers[i].initHardware(fcc1,bck1,learning_rate)
            self.layers[i].fwprop()

    def runbackprop(self,ylabel,learning_rate):

        pred=self.layers[self.nlayers-1].get_output()
        grad=self.calculate_gradient(pred,ylabel)

        self.layers[self.nlayers-1].set_dy(grad)
        
        for i in range(self.nlayers):
            self.layers[self.nlayers-i-1].initHardware(fcc1,bck1,learning_rate)
            self.layers[self.nlayers-i-1].bckprop()    
    
#     def predict(self,x):
        
#         self.runfwprop(x)
        
#         return self.get_output()[self.nlayers-1]   
    
    def train(self,x,y,epochs,learning_rate):
        x1=x.copy()
        y1=y.copy()
        for i in range(epochs):
            for j in range(len(x)):
                k=1
                self.runfwprop(x1[j],learning_rate)
                print("After fwprop")

                
                w,b=self.get_weights(k)
                y=self.get_output(k)
                dy=self.get_dy(k)
                x=self.get_input(k)
                base_addr=self.layer_configs[k]['x']
                y_addr=self.layer_configs[k]['y']
                dx_addr=self.layer_configs[k]['dx']
                dy_addr=self.layer_configs[k]['dy']

                print("Layer"+str(k)+" inputs="+str(x))
                print("Layer"+str(k)+" weights="+str(w))
                print("Layer"+str(k)+" bias="+str(b))
                print("Layer"+str(k)+" outputs="+str(y))
                print("Layer"+str(k)+" ygrads="+str(dy))
                
                
                self.runbackprop(y1[j],learning_rate)
                print("After backprop")

                w,b=self.get_weights(k)
                y=self.get_output(k)
                dy=self.get_dy(k)
                x=self.get_input(k)
                base_addr=self.layer_configs[k]['x']
                y_addr=self.layer_configs[k]['y']
                dx_addr=self.layer_configs[k]['dx']
                dy_addr=self.layer_configs[k]['dy']
            
                print("Layer"+str(k)+" inputs="+str(x))
                print("Layer"+str(k)+" weights="+str(w))
                print("Layer"+str(k)+" bias="+str(b))
                print("Layer"+str(k)+" outputs="+str(y))
                print("Layer"+str(k)+" ygrads="+str(dy))
                    
#                     print("Layer"+str(k)+" base address="+str(base_addr))
#                     print("Layer"+str(k)+" dx address="+str(dx_addr))
#                     print("Layer"+str(k)+" dy address="+str(dy_addr))

In [11]:

import numpy as np

x=[]
y=[]

for i in range(10):
    
    x.append(np.array([i*0.01]))
    y.append(np.array([np.sin(i*0.01)]))
model=Neural_Net(0x4001_0000)
model.add(1,8)
model.add(8,1)

model.train(x,y,20,0.1)

After fwprop
Layer1 inputs=[0.08601459860801697, 0.09538129717111588, 0.0024774998892098665, 0.07921002060174942, 0.007754609454423189, 0.0038973698392510414, 0.08118902146816254, 0.09967482835054398]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-1.1774008188970072e-18]
Layer1 ygrads=[90131283968.0]
After backprop
Layer1 inputs=[0.08601459860801697, 0.09538129717111588, 0.0024774998892098665, 0.07921002060174942, 0.007754609454423189, 0.0038973698392510414, 0.08118902146816254, 0.09967482835054398]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-1.1774008188970072e-18]
Layer1 ygrads=[-2.3548016377940144e-18]
Af

After backprop
Layer1 inputs=[-5.431879155392184e+17, 3.7408323902241927e-32, 1.1861191691354505e+19, 23877.154296875, -1.3861141457146378e-20, 6.709368606661227e+19, 1.6748661019130558e-18, 7.447746234199759e-29]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[1.1634981940090239e-21]
Layer1 ygrads=[-0.09995833784341812]
After fwprop
Layer1 inputs=[-0.00017837439372669905, -4.055096536337288e-10, -2.304010982001614e-09, 1.7541343013600553e-23, 5.081869712368625e-13, -4.4058222348208886e-11, 38973060.0, 1084599168.0]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.4807083892972897e-14]
Layer1 ygrads=[-0.099958337

After backprop
Layer1 inputs=[-0.00017837439372669905, -4.055096536337288e-10, -2.304010982001614e-09, 1.7541343013600553e-23, 5.081869712368625e-13, -4.4058222348208886e-11, 38973060.0, 1084599168.0]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.4807083892972897e-14]
Layer1 ygrads=[-0.11992800980806351]
After fwprop
Layer1 inputs=[1.4922968266468632e-28, -9.57219294372325e-17, 174819950592.0, -1.3194564042612876e+26, -1.2043175776287438e-28, 4.575702038541897e-31, -1.6992298412787932e-07, -226.6717987060547]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-3.566463097071093e-12]
Layer1 ygrads=[-0.119928009808

After backprop
Layer1 inputs=[1.4922968266468632e-28, -9.57219294372325e-17, 174819950592.0, -1.3194564042612876e+26, -1.2043175776287438e-28, 4.575702038541897e-31, -1.6992298412787932e-07, -226.6717987060547]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-3.566463097071093e-12]
Layer1 ygrads=[-0.13988569378852844]
After fwprop
Layer1 inputs=[0.08601459860801697, 0.09538129717111588, 0.0024774998892098665, 0.07921002060174942, 0.007754609454423189, 0.0038973698392510414, 0.08118902146816254, 0.09967482835054398]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-1.1774008188970072e-18]
Layer1 ygrads=[-0.1398856937

After backprop
Layer1 inputs=[0.08601459860801697, 0.09538129717111588, 0.0024774998892098665, 0.07921002060174942, 0.007754609454423189, 0.0038973698392510414, 0.08118902146816254, 0.09967482835054398]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-1.1774008188970072e-18]
Layer1 ygrads=[-2.3548016377940144e-18]
After fwprop
Layer1 inputs=[1852036.75, -5.271434356799239e-31, 2.0727055878018288e+35, -1.7432854016446833e+37, -934192960.0, 4.167765154727015e-25, -5.219981416652701e-21, -1.0224024376946871e+36]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.888749671759722e-21]
Layer1 ygrads=[-2.3548016377940144e

After backprop
Layer1 inputs=[-0.00017837439372669905, -4.055096536337288e-10, -2.304010982001614e-09, 1.7541343013600553e-23, 5.081869712368625e-13, -4.4058222348208886e-11, 38973060.0, 1084599168.0]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.4807083892972897e-14]
Layer1 ygrads=[-0.11992800980806351]
After fwprop
Layer1 inputs=[1.4922968266468632e-28, -9.57219294372325e-17, 174819950592.0, -1.3194564042612876e+26, -1.2043175776287438e-28, 4.575702038541897e-31, -1.6992298412787932e-07, -226.6717987060547]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-3.566463097071093e-12]
Layer1 ygrads=[-0.119928009808

After backprop
Layer1 inputs=[0.08601459860801697, 0.09538129717111588, 0.0024774998892098665, 0.07921002060174942, 0.007754609454423189, 0.0038973698392510414, 0.08118902146816254, 0.09967482835054398]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-1.1774008188970072e-18]
Layer1 ygrads=[-2.3548016377940144e-18]
After fwprop
Layer1 inputs=[1852036.75, -5.271434356799239e-31, 2.0727055878018288e+35, -1.7432854016446833e+37, -934192960.0, 4.167765154727015e-25, -5.219981416652701e-21, -1.0224024376946871e+36]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.888749671759722e-21]
Layer1 ygrads=[-2.3548016377940144e

After backprop
Layer1 inputs=[1852036.75, -5.271434356799239e-31, 2.0727055878018288e+35, -1.7432854016446833e+37, -934192960.0, 4.167765154727015e-25, -5.219981416652701e-21, -1.0224024376946871e+36]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-5.888749671759722e-21]
Layer1 ygrads=[-0.019999666139483452]
After fwprop
Layer1 inputs=[-4.252067845977088e-35, -4.81513121471529e+16, 1.3849635873719072e-16, -4.3393966752428643e-23, 59788349440.0, 6.540371471355271e+32, 2.8024561837813877e-12, -201633.328125]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[-3.28308764919611e+22]
Layer1 ygrads=[-0.019999666139483452]


After fwprop
Layer1 inputs=[-113.03935241699219, 3.7984687687982535e-14, -10715636736.0, 1.2507459359767381e-05, -3826454364160.0, -8863856197632.0, -0.0015045572072267532, -3.97651624368532e-26]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[1.5807492034086712e-12]
Layer1 ygrads=[2.2765971961835034e+21]
After backprop
Layer1 inputs=[-113.03935241699219, 3.7984687687982535e-14, -10715636736.0, 1.2507459359767381e-05, -3826454364160.0, -8863856197632.0, -0.0015045572072267532, -3.97651624368532e-26]
Layer1 weights=[0.04074086993932724, 0.03715186566114426, 0.05415401607751846, 0.04342549294233322, 0.04154830425977707, 0.05684860050678253, 0.029259327799081802, 0.015475516207516193]
Layer1 bias=[0.031595516949892044]
Layer1 outputs=[1.5807492034086712e-12]
Layer1 ygrads=[-0.07997866719961166]
After fw