In [1]:
from pynq import Overlay, GPIO, Register, allocate, MMIO
import os
import numpy as np

import struct


In [2]:
def float_to_int(f):
    return (struct.unpack('<I', struct.pack('<f', f))[0])

def int_to_float(i):
    return (struct.unpack('<f', struct.pack('<I', i))[0])

In [3]:
overlay = Overlay("nn.bit")
fcc1=overlay.forward_fcc_0
fcc1.register_map

bck1=overlay.backward_fcc_0
bck1.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x = Register(x=0),
  w = Register(w=0),
  b = Register(b=0),
  dx = Register(dx=0),
  dy = Register(dy=0),
  xdimension = Register(xdimension=0),
  ydimension = Register(ydimension=0),
  lr = Register(lr=0)
}

In [4]:
class FullyConnectedLayer():
    def __init__(self,xdim,ydim,base_addr):
        self.xdim=xdim
        self.ydim=ydim

        self.base_addr=base_addr

        self.BASE_ADDRESS_X=base_addr
        self.BASE_ADDRESS_DX=self.BASE_ADDRESS_X+xdim*4+4

        self.BASE_ADDRESS_W=self.BASE_ADDRESS_DX+xdim*4+4

        self.BASE_ADDRESS_B=self.BASE_ADDRESS_W+xdim*ydim*4+4

        self.BASE_ADDRESS_Y=self.BASE_ADDRESS_B+ydim*4+4
        self.BASE_ADDRESS_DY=self.BASE_ADDRESS_Y+ydim*4+4  

        self.mmio_x= MMIO(self.BASE_ADDRESS_X,self.xdim*4)
        self.mmio_dx=MMIO(self.BASE_ADDRESS_DX,self.xdim*4)

        self.mmio_w= MMIO(self.BASE_ADDRESS_W,self.xdim*self.ydim*4)

        self.mmio_y= MMIO(self.BASE_ADDRESS_Y,self.ydim*4)
        self.mmio_dy= MMIO(self.BASE_ADDRESS_DY,self.ydim*4)

        self.mmio_b= MMIO(self.BASE_ADDRESS_B,self.ydim*4)

        self.config_dic={'base':base_addr, 'x':self.BASE_ADDRESS_X,'w':self.BASE_ADDRESS_W,'y':self.BASE_ADDRESS_Y,'b':self.BASE_ADDRESS_B, 'dx':self.BASE_ADDRESS_DX, 'dy':self.BASE_ADDRESS_DY,'xdim':self.xdim,'ydim':self.ydim}


    def get_config_dic(self):
        
        return self.config_dic


    def initHardware(self,fwip,bckip,lr):

        self.fwip=fwip
        self.fwip.register_map.x=self.BASE_ADDRESS_X
        self.fwip.register_map.w=self.BASE_ADDRESS_W
        self.fwip.register_map.y=self.BASE_ADDRESS_Y
        self.fwip.register_map.b=self.BASE_ADDRESS_B
        self.fwip.register_map.xdimension=self.xdim
        self.fwip.register_map.ydimension=self.ydim

        self.bckip= bckip
        self.bckip.register_map.x=self.BASE_ADDRESS_X
        self.bckip.register_map.w=self.BASE_ADDRESS_W
        self.bckip.register_map.b=self.BASE_ADDRESS_B
        self.bckip.register_map.xdimension=self.xdim
        self.bckip.register_map.ydimension=self.ydim
        self.bckip.register_map.dx=self.BASE_ADDRESS_DX
        self.bckip.register_map.dy=self.BASE_ADDRESS_DY
        self.bckip.register_map.lr=lr

        
    def set_weights(self,w,b):        
        for i in range(self.xdim*self.ydim):
            self.mmio_w.write(i*4,float_to_int(w[i]))
        for i in range(self.ydim):
            self.mmio_b.write(i*4,float_to_int(b[i]))

    def reset_weights(self):
        for i in range(self.xdim*self.ydim):
            self.mmio_w.write(i*4,float_to_int(np.random.random()*0.1))
        
        for i in range(self.ydim):
            self.mmio_b.write(i*4,float_to_int(np.random.random()*0.1))
            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.xdim*self.ydim):
            w.append(int_to_float(self.mmio_w.read(i*4)))
        
        for i in range(self.ydim):
            b.append(int_to_float(self.mmio_b.read(i*4)))
        
        return w,b

    def set_input(self,x):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,float_to_int(x[i]))
    def get_input(self):
        x=[]     
        for i in range(self.xdim):
            x.append(int_to_float(self.mmio_x.read(i*4)))     
        return x
    def reset_input(self):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,0)
            self.mmio_dx.write(i*4,0)

            
    def reset_output(self):
        for i in range(self.ydim):
            self.mmio_y.write(i*4,0)
            self.mmio_dy.write(i*4,0)    
    def get_output(self):
        y=[]
        for i in range(self.ydim):
            y.append(int_to_float(self.mmio_y.read(i*4)))
        return y

    
    
    def set_dy(self,dy):        
        for i in range(self.ydim):
            self.mmio_dy.write(i*4,float_to_int(dy[i]))
    def get_dy(self):
        dy=[]
        for i in range(self.ydim):
            dy.append(int_to_float(self.mmio_dy.read(i*4)))
        return dy

    

    def fwprop(self):

        self.fwip.write(0x00, 1)
        fpga_state = self.fwip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fwip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fwip.write(0x00, 4)
                break

        self.fwip.write(0x00, 4)

    def bckprop(self):
        self.bckip.write(0x00, 1)
        fpga_state = self.bckip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.bckip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.bckip.write(0x00, 4)
                break

        self.bckip.write(0x00, 4)
        


In [None]:
import copy
class Neural_Net():

    def __init__(self,mem_base_addr,fcc1,bck1):

        self.mem_base_addr=mem_base_addr
        self.layers=[]
        self.last_layer_out_address=mem_base_addr
        self.layer_base_addresses=[]
        self.layer_configs=[]
        self.nlayers=0
        self.fcc1=fcc1
        self.bck1=bck1

    def add(self,input_shape,output_shape):
        layer=FullyConnectedLayer(input_shape,output_shape,self.last_layer_out_address)
        self.layers.append(layer)
        cfg=layer.get_config_dic()
        self.last_layer_out_address=cfg['y']
        layer.reset_output()
        layer.reset_weights()
        self.layer_configs.append(cfg)
        self.nlayers+=1
       
    def get_input(self,i):
        return self.layers[i].get_input()
    def get_layer_output(self,i):
        return self.layers[i].get_output()
    

    def set_weights(self,weights):        
        for i in range(self.nlayers):
            self.layers[i].set_weights(weights[i])    
    def get_weights(self,i):      
        return (self.layers[i].get_weights())
    
            
    def set_dy(self,i,grad):
        self.layers[i].set_dy(grad)        
    def get_dy(self,i):
        return self.layers[i].get_dy()
    

    
    def calculate_mse_loss(self,x,y):
        preds=np.zeros(len(y))
        for i in range(len(x)):
            preds[i]=self.predict(x[i],0.01)[0]
            
        return np.sum(np.square(np.array(y)-preds))/len(y)

    def calculate_gradient(self,y,ytrue):
        return 2*(y-ytrue)/len(y)
    
    
    def predict(self,x,lr):
        self.runfwprop(x,lr)
        return self.get_layer_output(self.nlayers-1)
    
        
    def runfwprop(self,x,learning_rate):

        self.layers[0].set_input(x)
        for i in range(self.nlayers):
            self.layers[i].initHardware(self.fcc1,self.bck1,learning_rate)
            self.layers[i].fwprop()

    def runbackprop(self,ylabel,learning_rate):

        pred=self.layers[self.nlayers-1].get_output()
        grad=self.calculate_gradient(pred,ylabel)
        self.layers[self.nlayers-1].set_dy(grad)
        
        for i in range(self.nlayers):
            self.layers[self.nlayers-i-1].initHardware(self.fcc1,self.bck1,learning_rate)
            self.layers[self.nlayers-i-1].bckprop()    
    
 
    
    def train(self,x,y,epochs,learning_rate):
        x1=x.copy()
        y1=y.copy()
        learning_rate=float_to_int(learning_rate)
        for i in range(epochs):
            for j in range(len(x)):

                self.runfwprop(x1[j],learning_rate)     
                self.runbackprop(y1[j],learning_rate)
                
            print(self.calculate_mse_loss(x[0:99],y[0:99]))
#                 for k in range(self.nlayers):
#                     w,b=self.get_weights(k)
#                     y=self.get_layer_output(k)
#                     dy=self.get_dy(k)
#                     x=self.get_input(k)
#                     base_addr=self.layer_configs[k]['x']
#                     y_addr=self.layer_configs[k]['y']
#                     dx_addr=self.layer_configs[k]['dx']
#                     dy_addr=self.layer_configs[k]['dy']

#                     print("Layer"+str(k)+" inputs="+str(x))
#                     print("Layer"+str(k)+" weights="+str(w))
#                     print("Layer"+str(k)+" bias="+str(b))
#                     print("Layer"+str(k)+" outputs="+str(y))
#                     print("Layer"+str(k)+" ygrads="+str(dy))
                

                
                
                    
#                     print("Layer"+str(k)+" base address="+str(base_addr))
#                     print("Layer"+str(k)+" dx address="+str(dx_addr))
#                     print("Layer"+str(k)+" dy address="+str(dy_addr))

In [None]:

x=[]
y=[]

for i in range(1000):
    x.append(np.array([0.01*i]))
    y.append(np.array([np.sin(0.01*i)]))

model=Neural_Net(0x4001_0000,fcc1,bck1)
model.add(1,8)
model.add(8,1)

model.train(x,y,100,0.1)



18.013877449592627
13.760083949301684
12.56832669965291
11.68425758044135
10.91960775962628
11.613590987718114
11.25843711140584
10.497327551589855
9.87535865454394
9.148587518026023
8.431214453065666
7.998799487252629
7.945014220800885
7.380116346127481
7.121442684489646
6.969963442024112
6.781694855308838
6.688501576306678
6.6587080257605855
6.496037088695261
6.5867840967795575
6.393335071620271
6.324759215372221
6.290391437535335
6.264417481851676
6.245640594925691
6.275232806500046
6.19007069131819
6.201171050159265
6.159397233732588
6.136467720035809
6.19533185334843
6.15404367126199
6.162127571765463
6.127087062838329
6.11600235865654
6.125757791664297
6.123058146779094
6.134725507768597
6.121024675759877
6.125230776378427
6.127142535303264
6.119670548754807
6.114855112149044
6.114725754216304
6.1177208464135475
6.116921604216609
6.163238088343073
6.112016124283813
6.114036238485295
6.112592713583067
6.111688348883816
6.115068348282334
6.126997093186591
6.119375777701167
6.114956

In [19]:
bck1.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x = Register(x=1073807360),
  w = Register(w=1073807392),
  b = Register(b=1073807408),
  dx = Register(dx=1073807376),
  dy = Register(dy=1073807424),
  xdimension = Register(xdimension=3),
  ydimension = Register(ydimension=1),
  lr = Register(lr=1036831949)
}

In [20]:
fcc1.register_map


RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x = Register(x=1073807360),
  w = Register(w=1073807392),
  y = Register(y=1073807416),
  b = Register(b=1073807408),
  xdimension = Register(xdimension=3),
  ydimension = Register(ydimension=1)
}