In [1]:
from pynq import Overlay, GPIO, Register, allocate, MMIO
import os
from convert import *
import numpy as np

import struct


In [2]:
overlay = Overlay("mnist.bit")
fccip=overlay.fcc_combined_0
convip=overlay.conv_combined_0
reluip=overlay.relu_combined_0
inputip=overlay.InputLayer_0
lossip=overlay.loss_derivative_0
weightip=overlay.update_weights_0
converter=Converter()

# overlay?

# bck1=overlay.backward_fcc_0
# # bck1.register_map

# actv_fwd1=overlay.activation_fwd_0
# actv_bck1=overlay.activation_bckwd_0

# actv_fwd1.register_map
fcc_weights=np.load("fcc_weights.npy")

In [3]:
class FullyConnectedLayer():
    def __init__(self,xdim,ydim):

        self.xdim=xdim
        self.ydim=ydim

        self.wbuff=allocate(shape=(xdim*ydim,), dtype='uint16')
        self.ybuff=allocate(shape=(ydim,), dtype='uint16')
        self.bbuff=allocate(shape=(ydim,), dtype='uint16')
        
        self.dwbuff=allocate(shape=(xdim*ydim,), dtype='uint16')
        self.dybuff=allocate(shape=(ydim,), dtype='uint16')
        self.dbbuff=allocate(shape=(ydim,), dtype='uint16')
        
        self.debug_x=allocate(shape=(xdim), dtype='uint16')
        self.debug_dx=allocate(shape=(xdim), dtype='uint16')

        self.BASE_ADDRESS_W=self.wbuff.physical_address
        self.BASE_ADDRESS_DW=self.dwbuff.physical_address

        self.BASE_ADDRESS_B=self.bbuff.physical_address
        self.BASE_ADDRESS_DB=self.dbbuff.physical_address
        
        self.reset_weights()


    def get_debug_activations(self):
        
        x=[]
        dx=[]        
        for i in range(self.xdim):
            x.append(converter.decode(int(self.debug_x[i])))
            dx.append(converter.decode(int(self.debug_dx[i])))            
        
        return x,dx


    def initHardware(self,fccip):

        self.fccip=fccip
        self.fccip.register_map.wt=self.BASE_ADDRESS_W
        self.fccip.register_map.dwt=self.BASE_ADDRESS_DW
        self.fccip.register_map.b=self.BASE_ADDRESS_B
        self.fccip.register_map.db=self.BASE_ADDRESS_DB
        self.fccip.register_map.xdim=self.xdim
        self.fccip.register_map.ydim=self.ydim
        self.fccip.register_map.debug_x= self.debug_x.physical_address
        self.fccip.register_map.debug_dx= self.debug_dx.physical_address
        self.fccip.register_map.debugip= True
        self.fccip.register_map.fwprop=True
        
    def update_weights(self, weightip, lr):
        weightip.register_map.w=self.BASE_ADDRESS_W
        weightip.register_map.dw=self.BASE_ADDRESS_DW
        weightip.register_map.lr=converter.encode(lr)
        weightip.register_map.dim=(self.xdim*self.ydim)
        
        weightip.write(0x00, 1)
        fpga_state = weightip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = weightip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                weightip.write(0x00, 4)
                break

        weightip.write(0x00, 4)

        
    def set_weights(self,w,b):        
        self.wbuff[:]=[int(converter.encode(el)) for el in w]
        self.bbuff[:]=[int(converter.encode(el)) for el in b]
        self.wbuff.flush()
        self.bbuff.flush()

    def reset_weights(self):
        for i in range(self.xdim*self.ydim):
            self.wbuff[i]=fcc_weights[i]
        
        for i in range(self.ydim):
            self.bbuff[i]=int(converter.encode(0+0.002*i))
            
        self.wbuff.flush()
        self.bbuff.flush()
            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.xdim*self.ydim):
            w.append(converter.decode(int(self.wbuff[i])))
        
        for i in range(self.ydim):
            b.append(converter.decode(int(self.bbuff[i])))
        
        return w,b
   

    def fwprop(self):
        
        self.fccip.register_map.fwprop=True
        self.fccip.write(0x00, 1)
        fpga_state = self.fccip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fccip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fccip.write(0x00, 4)
                break

        self.fccip.write(0x00, 4)

    def bckprop(self):
        
        self.fccip.register_map.fwprop=False
        self.fccip.write(0x00, 1)
        fpga_state = self.fccip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fccip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fccip.write(0x00, 4)
                break

        self.fccip.write(0x00, 4)
        


In [4]:
class ConvolutionLayer():
    def __init__(self,F,C,H,W,FH,FW):

        self.F=F
        self.C=C
        self.H=H
        self.W=W
        self.FH=FH
        self.FW=FW

        self.wbuff=allocate(shape=(F*C*FH*FW,), dtype='uint16')
        self.bbuff=allocate(shape=(F,), dtype='uint16')
        
        self.dwbuff=allocate(shape=(F*C*FH*FW,), dtype='uint16')
        self.dbbuff=allocate(shape=(F,), dtype='uint16')

        self.BASE_ADDRESS_W=self.wbuff.physical_address
        self.BASE_ADDRESS_DW=self.dwbuff.physical_address

        self.BASE_ADDRESS_B=self.bbuff.physical_address
        self.BASE_ADDRESS_DB=self.dbbuff.physical_address
        
        self.debug_x=allocate(shape=(C*H*W), dtype='uint16')
        self.debug_dx=allocate(shape=(C*H*W), dtype='uint16')
        
        self.reset_weights()



    def initHardware(self,convip):

        self.convip=convip
        
        self.convip.register_map.wt=self.BASE_ADDRESS_W
        
        self.convip.register_map.dwt=self.BASE_ADDRESS_DW
        self.convip.register_map.b=self.BASE_ADDRESS_B
        self.convip.register_map.db=self.BASE_ADDRESS_DB
        self.convip.register_map.H=self.H
        self.convip.register_map.W=self.W
        self.convip.register_map.FH=self.FH
        self.convip.register_map.FW=self.FW
        self.convip.register_map.F=self.F
        self.convip.register_map.C=self.C
        self.convip.register_map.debugip= True
        self.convip.register_map.debug_x= self.debug_x.physical_address
        self.convip.register_map.debug_dx= self.debug_dx.physical_address
        self.convip.register_map.fwprop=True
        
    def update_weights(self, weightip, lr):
        weightip.register_map.w=self.BASE_ADDRESS_W
        weightip.register_map.dw=self.BASE_ADDRESS_DW
        weightip.register_map.lr=converter.encode(lr)
        weightip.register_map.dim=(self.F*self.C*self.FH*self.FW)
        
        weightip.write(0x00, 1)
        fpga_state = weightip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = weightip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                weightip.write(0x00, 4)
                break

        weightip.write(0x00, 4)
        
    def get_debug_activations(self):
        
        x=[]
        dx=[]        
        for i in range(self.C*self.H*self.W):
            x.append(converter.decode(int(self.debug_x[i])))
        
        for i in range(self.C*self.H*self.W):
            dx.append(converter.decode(int(self.debug_dx[i])))
        
        return x,dx
        

        
    def set_weights(self,w,b):        
        self.wbuff[:]=[int(converter.encode(el)) for el in w]
        self.bbuff[:]=[int(converter.encode(el)) for el in b]
        self.wbuff.flush()
        self.bbuff.flush()

    def reset_weights(self):
        for i in range(self.F*self.C*self.FH*self.FW):
            self.wbuff[i]=int(converter.encode(0.1+0.002*i))
        
        for i in range(self.F):
            self.bbuff[i]=int(converter.encode(0.0+0.002*i))
            
        self.wbuff.flush()
        self.bbuff.flush()

            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.F*self.C*self.FH*self.FW):
            w.append(converter.decode(int(self.wbuff[i])))
        
        for i in range(self.F):
            b.append(converter.decode(int(self.bbuff[i])))
        
        return w,b
    
    def get_weight_grads(self):
        
        dw=[]
        db=[]        
        for i in range(self.F*self.C*self.FH*self.FW):
            dw.append(converter.decode(int(self.dwbuff[i])))
        
        for i in range(self.F):
            db.append(converter.decode(int(self.dbbuff[i])))
        
        return dw,db
   

    def fwprop(self):
        
        self.convip.register_map.fwprop=True
        self.convip.write(0x00, 1)
        fpga_state = self.convip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.convip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.convip.write(0x00, 4)
                break


    def bckprop(self):
        
        self.convip.register_map.fwprop=False
        self.convip.write(0x00, 1)
        fpga_state = self.convip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.convip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.convip.write(0x00, 4)
                break

        self.convip.write(0x00, 4)

In [5]:
class ReluLayer():
    def __init__(self,dim):

        self.dim=dim
        
        self.debug_x=allocate(shape=(dim), dtype='uint16')
        self.debug_dx=allocate(shape=(dim), dtype='uint16')


    def initHardware(self,reluip):

        self.reluip=reluip
        self.reluip.register_map.dim=self.dim
        self.reluip.register_map.debug_x= self.debug_x.physical_address
        self.reluip.register_map.debug_dx= self.debug_dx.physical_address
        self.reluip.register_map.debugip=True
        
    def get_debug_activations(self):
        
        x=[]
        dx=[]
        
        for i in range(self.dim):
            x.append(converter.decode(int(self.debug_x[i])))
            dx.append(converter.decode(int(self.debug_dx[i])))
        
        return x,dx
        
        
    def update_weights(self,weightip,learning_rate):
        return
   

    def fwprop(self):
        
        self.reluip.register_map.fwprop=True
        self.reluip.write(0x00, 1)
        fpga_state = self.reluip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.reluip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.reluip.write(0x00, 4)
                break

        self.reluip.write(0x00, 4)

    def bckprop(self):
        
        self.reluip.register_map.fwprop=False
        self.reluip.write(0x00, 1)
        fpga_state = self.reluip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.reluip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.reluip.write(0x00, 4)
                break

        self.reluip.write(0x00, 4)
        


In [6]:
import copy
class Neural_Net():

    def __init__(self, fccip, convip, reluip,inputip,lossip,weightip,xdim,ydim):

        self.layers=[]
        self.layer_names=[]
        self.layer_base_addresses=[]
        self.nlayers=0
        self.fccip=fccip
        self.convip=convip
        self.reluip=reluip
        self.inputip=inputip
        self.lossip=lossip
        self.weightip=weightip
        
        self.xbuff=allocate(shape=(xdim,), dtype='uint16')
        self.dxbuff=allocate(shape=(xdim,), dtype='uint16')
        self.ybuff=allocate(shape=(ydim,), dtype='uint16')
        self.dybuff=allocate(shape=(ydim,), dtype='uint16')
        
        self.xdim=xdim
        self.ydim=ydim 
        
        self.dx_ddr_addr=self.dxbuff.physical_address
                
        self.x_ddr_addr=self.xbuff.physical_address
        
        self.dy_ddr_addr=self.dybuff.physical_address
        
        self.y_ddr_addr=self.ybuff.physical_address
        
        self.inputip.register_map.x=self.x_ddr_addr
        self.inputip.register_map.dx=self.dx_ddr_addr
        self.inputip.register_map.dim=xdim
        self.inputip.register_map.ddrtobram=1
        
        self.lossip.register_map.x_ddr=self.y_ddr_addr
        self.lossip.register_map.dx_ddr=self.dy_ddr_addr
        self.lossip.register_map.dim=ydim
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=1
        self.lossip.register_map.y=0
        self.lossip.register_map.N=0
        
        
        

    def add_fcc(self,xdim,ydim):
        layer=FullyConnectedLayer(xdim,ydim)
        self.layers.append(layer)
        self.layer_names.append("fcc")
        self.nlayers+=1
        
    def add_conv(self,F,C,H,W,FH,FW):
        layer=ConvolutionLayer(F,C,H,W,FH,FW)
        self.layers.append(layer)
        self.layer_names.append("conv")
        self.nlayers+=1
        
    def add_relu(self,dim):

        layer=ReluLayer(dim)
        self.layers.append(layer)
        self.layer_names.append("relu")
        self.nlayers+=1
        
    def update_weights(self, lr):
        
        for i in range(self.nlayers):
            self.layers[i].update_weights(self.weightip,lr)
       
    def write_input(self,xvals):
        print(xvals)
        for i in range(self.xdim):
            self.xbuff[i]= int(converter.encode(xvals[i]))
        self.xbuff.flush()
        
        self.inputip.register_map.ddrtobram=1
        
        self.inputip.write(0x00,1)
        ip_state = self.inputip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.inputip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.inputip.write(0x00, 4)
                break

        self.inputip.write(0x00, 4)
                    
        
                
    def fetch_input(self):
        #########################################
        
        self.inputip.register_map.ddrtobram=0
        
        self.inputip.write(0x00,1)
        ip_state = self.inputip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.inputip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.inputip.write(0x00, 4)
                break

        self.inputip.write(0x00, 4)
        
        xvals=[]
        dxvals=[]
        for i in range(self.xdim):
            xvals.append(converter.decode(int(self.xbuff[i])))
                         
        for i in range(self.xdim):
            dxvals.append(converter.decode(int(self.dxbuff[i])))
        
        return xvals,dxvals
        
        
    def write_output(self,yvals, dyvals):
        
        for i in range(self.ydim): 
            ybuff[i]= int(converter.encode(yvals[i]))
            dybuff[i]= int(converter.encode(dyvals[i]))
            
        ybuff.flush()
        dybuff.flush()
        
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=1
        
        self.lossip.write(0x00,1)
        ip_state = self.lossip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.lossip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.lossip.write(0x00, 4)
                break

        self.lossip.write(0x00, 4)
                    
        

        
    def fetch_output(self):
        
        self.lossip.register_map.writetoddr=1
        self.lossip.register_map.ddrtobram=0
        
        self.lossip.write(0x00,1)
        ip_state = self.lossip.read(0x00)

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.lossip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.lossip.write(0x00, 4)
                break

        self.lossip.write(0x00, 4)
        
        
        yvals=[]
        for i in range(self.ydim):
            yvals.append(converter.decode(int(self.ybuff[i])))
        dyvals=[]
        for i in range(self.ydim):
            dyvals.append(converter.decode(int(self.dybuff[i])))
        
        return yvals,dyvals
    
    
    def calculate_loss_gradient(self,label, batch_size):
        
        self.lossip.register_map.writetoddr=0
        self.lossip.register_map.y=label
        self.lossip.register_map.N=batch_size
        
        self.lossip.write(0x00,1)
        ip_state = self.lossip.read(0x00)
        print("yoyo")

        max_try = 1000000
        while ip_state != 6 and ip_state != 4:
            ip_state = self.lossip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.lossip.write(0x00, 4)
                break
        

        self.lossip.write(0x00, 4)
        loss= self.lossip.register_map.ap_return
        loss=converter.decode(int(loss))
        
        return loss
        

    
    
    def predict(self,x):
        
        self.runfwprop(x)
        yvals, dyvals= self.fetch_output()
            
        return yvals
    
        
    def runfwprop(self,x):
        
        self.write_input(x)
        print("written x")
        
        for i in range(self.nlayers):
            if self.layer_names[i] == "fcc":
                print("fcc")
                
                self.layers[i].initHardware(self.fccip)
                self.layers[i].fwprop()
            elif self.layer_names[i] == "conv":
                print("conv")
                self.layers[i].initHardware(self.convip)
                self.layers[i].fwprop()
            else:
                print("relu")
                self.layers[i].initHardware(self.reluip)
                self.layers[i].fwprop()
                


    def runbackprop(self):
      
        for i in range(self.nlayers):
            j=self.nlayers-i-1
            self.layers[j].bckprop()
    
    
    def train(self,x,y,epochs,learning_rate, batch_size):
        print("inside training")
        x1=x.copy()
        y1=y.copy()
        
        for i in range(epochs):
            
            for j in range(x1.shape[0]):
                print("yo")
                self.runfwprop(x1[j])
                print("done fwprop")
                loss=self.calculate_loss_gradient(y1[j],batch_size)
                print(loss)
                self.runbackprop()
                self.update_weights(learning_rate)

In [7]:
x_train=np.load('x_train.npy')
y_train=np.load('y_train.npy')
x_test=np.load('x_test.npy')
y_test=np.load('y_test.npy')

In [8]:
nn=Neural_Net(fccip, convip,reluip,inputip,lossip,weightip,784,10)
nn.add_conv(5,1,28,28,5,5)
nn.add_relu(2880)
nn.add_fcc(2880,10)

In [None]:
nn.train(x_train,y_train,10,0.1,1)

inside training
yo


In [24]:
print(y)
print(dy)

[array([0.49731445]), array([0.64611816]), array([0.54040527])]
[array([0.31201172]), array([-0.63793945]), array([0.32568359])]


In [25]:
x,dx = nn.layers[1].get_debug_activations()
print(x)
print(dx)

[array([0.18041992]), array([0.31689453]), array([0.32922363]), array([0.21118164])]
[array([0.32849121]), array([-0.33886719]), array([-0.34423828]), array([0.35046387])]


In [26]:
w,b=nn.layers[1].get_weights()

In [27]:
print(w)
print(b)

[array([0.1138916]), array([0.11694336])]
[array([0.])]


In [14]:
x=[1,2,3,4]
x1=allocate(shape=(4,),dtype='uint16')
x2=allocate(shape=(4,),dtype='uint16')

for i in range(4):
    x1[i]= x[i]

In [15]:
lossip.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  ap_return = Register(ap_return=0, RESERVED=0),
  x_ddr = Register(x_ddr=378007552),
  dx_ddr = Register(dx_ddr=378011648),
  y = Register(y=0),
  dim = Register(dim=10),
  writetoddr = Register(writetoddr=1, RESERVED=0),
  ddrtobram = Register(ddrtobram=1, RESERVED=0)
}

In [6]:
x1=allocate(shape=(4,),dtype='uint16')
x2=allocate(shape=(4,),dtype='uint16')

x1[:]=[converter.encode(0.1),converter.encode(0.4),converter.encode(0.1),converter.encode(0.1)]
y=1
N=1
x2[:]=[0,0,0,0]


In [7]:
lossip.register_map.x_ddr=x1.physical_address
lossip.register_map.dx_ddr=x2.physical_address
lossip.register_map.y=y
lossip.register_map.writetoddr=1
lossip.register_map.ddrtobram=0
lossip.register_map.dim=4

In [8]:
lossip.write(0x00,1)

In [22]:
for i in range(4):
    print(converter.decode(int(x1[i])))

[0.49938965]
[0.49938965]
[0.49938965]
[0.]


In [15]:
wt=np.zeros(28800)
for i in range(28800):
    wt[i]=int(converter.encode(0.2+0.002*i))

In [16]:
np.save("fcc_weights.npy",wt)