In [1]:
from pynq import Overlay, GPIO, Register, allocate, MMIO
import os
from convert import *
import numpy as np

import struct


In [2]:
overlay = Overlay("nnv3.bit")
fccip=overlay.fcc_combined_0
convip=overlay.conv_combined_0
reluip=overlay.relu_combined_0
inputip=overlay.InputLayer_0
lossip=overlay.loss_derivative_0

# overlay?

# bck1=overlay.backward_fcc_0
# # bck1.register_map

# actv_fwd1=overlay.activation_fwd_0
# actv_bck1=overlay.activation_bckwd_0

# actv_fwd1.register_map

In [3]:
lossip.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  ap_return = Register(ap_return=0, RESERVED=0),
  x_ddr = Register(x_ddr=0),
  dx_ddr = Register(dx_ddr=0),
  y = Register(y=0),
  x_size = Register(x_size=0),
  N = Register(N=0),
  writetoddr = Register(writetoddr=0, RESERVED=0),
  ddrtobram = Register(ddrtobram=0, RESERVED=0)
}

In [4]:
converter=Converter()

# result1 = converter.forward_conversion(input_data=-0.7)
# print('Forward converted input:',result1)

# fractional_result = converter.backward_conversion(input_data=result1)
# print('Backward converted input', fractional_result)
overlay?

In [5]:
class FullyConnectedLayer():
    def __init__(self,xdim,ydim):

        self.xdim=xdim
        self.ydim=ydim

        self.wbuff=allocate(shape=(xdim*ydim,), dtype='uint16')
        self.ybuff=allocate(shape=(ydim,), dtype='uint16')
        self.bbuff=allocate(shape=(ydim,), dtype='uint16')
        
        self.dwbuff=allocate(shape=(xdim*ydim,), dtype='uint16')
        self.dybuff=allocate(shape=(ydim,), dtype='uint16')
        self.dbbuff=allocate(shape=(ydim,), dtype='uint16')

        self.BASE_ADDRESS_W=self.wbuff.physical_address
        self.BASE_ADDRESS_DW=self.dwbuff.physical_address

        self.BASE_ADDRESS_B=self.bbuff.physical_address
        self.BASE_ADDRESS_DB=self.dbbuff.physical_address
        
        self.reset_weights()


    def get_config_dic(self):
        
        return self.config_dic


    def initHardware(self,fccip):

        self.fccip=fccip
        self.fccip.register_map.wt=self.BASE_ADDRESS_W
        self.fccip.register_map.dwt=self.BASE_ADDRESS_DW
        self.fccip.register_map.b=self.BASE_ADDRESS_B
        self.fccip.register_map.db=self.BASE_ADDRESS_DB
        self.fccip.register_map.xdim=self.xdim
        self.fccip.register_map.ydim=self.ydim
        self.fccip.register_map.fwprop=True

        
    def set_weights(self,w,b):        
        self.wbuff[:]=[int(converter.encode(el)) for el in w]
        self.bbuff[:]=[int(converter.encode(el)) for el in b]
        self.wbuff.flush()
        self.bbuff.flush()

    def reset_weights(self):
        for i in range(self.xdim*self.ydim):
            self.wbuff[i]=int(converter.encode(0.2))
        
        for i in range(self.ydim):
            self.bbuff[i]=int(converter.encode(0))
            
        self.wbuff.flush()
        self.bbuff.flush()
            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.xdim*self.ydim):
            w.append(converter.decode(self.wbuff[i]))
        
        for i in range(self.ydim):
            b.append(converter.decode(self.bbuff[i]))
        
        return w,b
   

    def fwprop(self):
        
        self.fccip.register_map.fwprop=True
        self.fccip.write(0x00, 1)
        fpga_state = self.fccip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fccip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fccip.write(0x00, 4)
                break

        self.fccip.write(0x00, 4)

    def bckprop(self):
        
        self.fccip.register_map.fwprop=False
        self.fccip.write(0x00, 1)
        fpga_state = self.fccip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fccip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fccip.write(0x00, 4)
                break

        self.fccip.write(0x00, 4)
        


In [6]:
class ConvolutionLayer():
    def __init__(self,F,C,H,W,FH,FW):

        self.F=F
        self.C=C
        self.H=H
        self.W=W
        self.FH=FH
        self.FW=FW

        self.wbuff=allocate(shape=(F*C*FH*FW,), dtype='uint16')
        self.bbuff=allocate(shape=(F,), dtype='uint16')
        
        self.dwbuff=allocate(shape=(F*C*FH*FW,), dtype='uint16')
        self.dbbuff=allocate(shape=(F,), dtype='uint16')

        self.BASE_ADDRESS_W=self.wbuff.physical_address
        self.BASE_ADDRESS_DW=self.dwbuff.physical_address

        self.BASE_ADDRESS_B=self.bbuff.physical_address
        self.BASE_ADDRESS_DB=self.dbbuff.physical_address 
        
        self.reset_weights()



    def initHardware(self,convip):

        self.convip=convip
        self.convip.register_map.wt=self.BASE_ADDRESS_W
        self.convip.register_map.dwt=self.BASE_ADDRESS_DW
        self.convip.register_map.b=self.BASE_ADDRESS_B
        self.convip.register_map.db=self.BASE_ADDRESS_DB
        self.convip.register_map.H=self.H
        self.convip.register_map.W=self.W
        self.convip.register_map.FH=self.FH
        self.convip.register_map.FW=self.FW
        self.convip.register_map.F=self.F
        self.convip.register_map.C=self.C
        self.convip.register_map.fwprop=True

        
    def set_weights(self,w,b):        
        self.wbuff[:]=[int(converter.encode(el)) for el in w]
        self.bbuff[:]=[int(converter.encode(el)) for el in b]
        self.wbuff.flush()
        self.bbuff.flush()

    def reset_weights(self):
        for i in range(self.F*self.C*self.FH*self.FW):
            self.wbuff[i]=int(converter.encode(1.0))
        
        for i in range(self.F):
            self.bbuff[i]=int(converter.encode(0.1))
            
        self.wbuff.flush()
        self.bbuff.flush()

            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.F*self.C*self.FH*self.FW):
            w.append(converter.decode(wbuff[i]))
        
        for i in range(self.F):
            b.append(converter.decode(bbuff[i]))
        
        return w,b
   

    def fwprop(self):
        
        self.convip.register_map.fwprop=True
        self.convip.write(0x00, 1)
        fpga_state = self.convip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.convip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.convip.write(0x00, 4)
                break


    def bckprop(self):
        
        self.convip.register_map.fwprop=False
        self.convip.write(0x00, 1)
        fpga_state = self.convip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.convip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.convip.write(0x00, 4)
                break

        self.convip.write(0x00, 4)

In [7]:
class ReluLayer():
    def __init__(self,dim):

        self.dim=dim


    def initHardware(self,reluip):

        self.reluip=reluip
        self.reluip.register_map.dim=self.dim
   

    def fwprop(self):
        
        self.reluip.register_map.fwprop=True
        self.reluip.write(0x00, 1)
        fpga_state = self.reluip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.reluip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.reluip.write(0x00, 4)
                break

        self.reluip.write(0x00, 4)

    def bckprop(self):
        
        self.reluip.register_map.fwprop=False
        self.reluip.write(0x00, 1)
        fpga_state = self.reluip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.reluip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.reluip.write(0x00, 4)
                break

        self.reluip.write(0x00, 4)
        


In [8]:
import copy
class Neural_Net():

    def __init__(self, fccip, convip, reluip,inputip,lossip,gradientip,xdim,ydim):

        self.layers=[]
        self.layer_names=[]
        self.layer_base_addresses=[]
        self.nlayers=0
        self.fccip=fccip
        self.convip=convip
        self.reluip=reluip
        self.inputip=inputip
        self.lossip=lossip
        
        self.xbuff=allocate(shape=(xdim,), dtype='uint16')
        self.dxbuff=allocate(shape=(xdim,), dtype='uint16')
        self.ybuff=allocate(shape=(ydim,), dtype='uint16')
        self.dybuff=allocate(shape=(ydim,), dtype='uint16')
        
        self.xdim=xdim
        self.ydim=ydim 
        
        self.dx_ddr_addr=self.dxbuff.physical_address
                
        self.x_ddr_addr=self.xbuff.physical_address
        
        self.dy_ddr_addr=self.dybuff.physical_address
        
        self.y_ddr_addr=self.ybuff.physical_address
        
        self.inputip.register_map.x=self.x_ddr_addr
        self.inputip.register_map.dx=self.dx_ddr_addr
        self.inputip.register_map.dim=xdim
        self.inputip.register_map.ddrtobram=1
        
        self.lossip.register_map.x_ddr=self.y_ddr_addr
        self.lossip.register_map.dx_ddr=self.dy_ddr_addr
        self.lossip.register_map.x_size=ydim
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=1
        self.lossip.register_map.y=0
        self.lossip.register_map.N=0
        
        
        

    def add_fcc(self,xdim,ydim):
        layer=FullyConnectedLayer(xdim,ydim)
        self.layers.append(layer)
        self.layer_names.append("fcc")
        self.nlayers+=1
        
    def add_conv(self,F,C,H,W,FH,FW):
        layer=ConvolutionLayer(F,C,H,W,FH,FW)
        self.layers.append(layer)
        self.layer_names.append("conv")
        self.nlayers+=1
        
    def add_relu(self,dim):

        layer=ReluLayer(dim)
        self.layers.append(layer)
        self.layer_names.append("relu")
        self.nlayers+=1
       
    def write_input(self,xvals):
        
        for i in range(self.xdim):
            self.xbuff[i]= int(converter.encode(xvals[i]))
        self.xbuff.flush()
        
        self.inputip.register_map.ddrtobram=1
        
        self.inputip.write(0x00,1)
        ip_state = self.inputip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.inputip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.inputip.write(0x00, 4)
                break

        self.inputip.write(0x00, 4)
                    
        
                
    def fetch_input(self):
        #########################################
        
        self.inputip.register_map.ddrtobram=0
        
        self.inputip.write(0x00,1)
        ip_state = self.inputip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.inputip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.inputip.write(0x00, 4)
                break

        self.inputip.write(0x00, 4)
        
        xvals=[]
        dxvals=[]
        for i in range(self.xdim):
            xvals.append(converter.decode(int(self.xbuff[i])))
                         
        for i in range(self.xdim):
            dxvals.append(converter.decode(int(self.dxbuff[i])))
        
        return xvals,dxvals
        
        
    def write_output(self,yvals, dyvals):
        
        for i in range(self.ydim): 
            ybuff[i]= int(converter.encode(yvals[i]))
            dybuff[i]= int(converter.encode(dyvals[i]))
            
        ybuff.flush()
        dybuff.flush()
        
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=1
        
        self.lossip.write(0x00,1)
        ip_state = self.lossip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.lossip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.lossip.write(0x00, 4)
                break

        self.lossip.write(0x00, 4)
                    
        

        
    def fetch_output(self):
        
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=0
        
        self.lossip.write(0x00,1)
        ip_state = self.lossip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.lossip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.lossip.write(0x00, 4)
                break

        self.lossip.write(0x00, 4)
        
        
        yvals=[]
        for i in range(self.ydim):
            yvals.append(converter.decode(int(self.ybuff[i])))
        dyvals=[]
        for i in range(self.ydim):
            dyvals.append(converter.decode(int(self.dybuff[i])))
        
        return yvals,dyvals
    

    
    def calculate_mse_loss(self,x,y):
        x2=x.copy()
        
        if self.layer_names[-1] == "fcc":

            preds=np.zeros((len(y),self.layer_configs[-1]['ydim']))
        else:
            preds=np.zeros((len(y),self.layer_configs[-1]['dim']))
        
        for i in range(len(x2)):
            preds[i]=self.predict(x2[i],0.01)
            memcpy(bram_y,y,ydim);
        loss =0
        for i in range(len(y)):
            loss += np.swriteum(np.square(preds[i]-y[i]),axis=None)
            
        return loss/len(y)
    
    
    def calculate_gradient(self,y,ytrue):
        return 2*(y-ytrue)
    
    
    def predict(self,x):
        
        self.runfwprop(x)
        yvals, dyvals= self.fetch_output()
            
        return yvals
    
        
    def runfwprop(self,x):
        
        self.write_input(x)
        
        for i in range(self.nlayers):
            if self.layer_names[i] == "fcc":
                
                self.layers[i].initHardware(self.fccip)
                self.layers[i].fwprop()
            elif self.layer_names[i] == "conv":
                self.layers[i].initHardware(self.convip)
                self.layers[i].fwprop()
            else:
                self.layers[i].initHardware(self.reluip)
                self.layers[i].fwprop()
                


    def runbackprop(self):
      
        for i in range(self.nlayers):
            j=self.nlayers-i-1
            self.layers[j].bckprop()
    
    
    def train(self,x,y,epochs,learning_rate):
        x1=x.copy()
        y1=y.copy()
        
        for i in range(epochs):
            print(self.calculate_mse_loss(x1[0:99],y1[0:99]))
            for j in range(len(x)):
                self.runfwprop(x1[j])
                
                grads=self.calculate_gradient(pred,ylabel)
                self.write_output(preds,grads)
                self.runbackprop()

In [10]:
nn=Neural_Net(fccip, convip,reluip,inputip,lossip,4,3)
nn.add_fcc(4,4)
nn.add_conv(1,1,4,1,2,1)
nn.add_relu(3)
x=[0.1,0.2,0.3,0.4]

nn.runfwprop(x)

y,dy=nn.fetch_output()


In [23]:
lossip.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  ap_return = Register(ap_return=10511, RESERVED=0),
  x_ddr = Register(x_ddr=377843712),
  dx_ddr = Register(dx_ddr=377847808),
  y = Register(y=1),
  x_size = Register(x_size=4),
  N = Register(N=1),
  writetoddr = Register(writetoddr=0, RESERVED=0),
  ddrtobram = Register(ddrtobram=0, RESERVED=0)
}

In [None]:
x=[1,2,3,4]
x1=allocate(shape=(4,),dtype='uint32')
x2=allocate(shape=(4,),dtype='uint32')

for i in range(4):
    x1[i]= x[i]

In [30]:
x=allocate(shape=(4,),dtype='uint16')
dx=allocate(shape=(4,),dtype='uint16')

x[:]=[converter.encode(0.1),converter.encode(0.4),converter.encode(0.1),converter.encode(0.1)]
y=1
N=1
dx[:]=[0,0,0,0]


In [31]:
lossip.register_map.x_ddr=x.physical_address
lossip.register_map.dx_ddr=dx.physical_address
lossip.register_map.y=y
lossip.register_map.N=N
lossip.register_map.writetoddr=0
lossip.register_map.ddrtobram=0
lossip.register_map.x_size=4

In [32]:
lossip.write(0x00,1)

In [33]:
for i in range(4):
    print(converter.decode(int(dx[i])))

[0.]
[0.]
[0.]
[0.]
