In [1]:
from pynq import Overlay, GPIO, Register, allocate, MMIO
import os
import numpy as np

import struct


In [2]:
def float_to_int(f):
    return (struct.unpack('<I', struct.pack('<f', f))[0])

def int_to_float(i):
    return (struct.unpack('<f', struct.pack('<I', i))[0])

In [3]:
overlay = Overlay("nn.bit")
fcc1=overlay.forward_fcc_0
# fcc1.register_map

bck1=overlay.backward_fcc_0
# bck1.register_map

actv_fwd1=overlay.activation_fwd_0
actv_bck1=overlay.activation_bckwd_0

actv_fwd1.register_map


RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),
  x = Register(x=0),
  y = Register(y=0),
  dimension = Register(dimension=0),
  type_r = Register(type_r=0)
}

In [4]:
class FullyConnectedLayer():
    def __init__(self,xdim,ydim,base_addr):

        self.xdim=xdim
        self.ydim=ydim

        self.base_addr=base_addr

        self.BASE_ADDRESS_X=base_addr
        self.BASE_ADDRESS_DX=self.BASE_ADDRESS_X+xdim*4+4

        self.BASE_ADDRESS_W=self.BASE_ADDRESS_DX+xdim*4+4

        self.BASE_ADDRESS_B=self.BASE_ADDRESS_W+xdim*ydim*4+4

        self.BASE_ADDRESS_Y=self.BASE_ADDRESS_B+ydim*4+4
        self.BASE_ADDRESS_DY=self.BASE_ADDRESS_Y+ydim*4+4  

        self.mmio_x= MMIO(self.BASE_ADDRESS_X,self.xdim*4)
        self.mmio_dx=MMIO(self.BASE_ADDRESS_DX,self.xdim*4)

        self.mmio_w= MMIO(self.BASE_ADDRESS_W,self.xdim*self.ydim*4)

        self.mmio_y= MMIO(self.BASE_ADDRESS_Y,self.ydim*4)
        self.mmio_dy= MMIO(self.BASE_ADDRESS_DY,self.ydim*4)

        self.mmio_b= MMIO(self.BASE_ADDRESS_B,self.ydim*4)

        self.config_dic={'base':base_addr, 'x':self.BASE_ADDRESS_X,'w':self.BASE_ADDRESS_W,'y':self.BASE_ADDRESS_Y,'b':self.BASE_ADDRESS_B, 'dx':self.BASE_ADDRESS_DX, 'dy':self.BASE_ADDRESS_DY,'xdim':self.xdim,'ydim':self.ydim}


    def get_config_dic(self):
        
        return self.config_dic


    def initHardware(self,fwip,bckip,lr):

        self.fwip=fwip
        self.fwip.register_map.x=self.BASE_ADDRESS_X
        self.fwip.register_map.w=self.BASE_ADDRESS_W
        self.fwip.register_map.y=self.BASE_ADDRESS_Y
        self.fwip.register_map.b=self.BASE_ADDRESS_B
        self.fwip.register_map.xdimension=self.xdim
        self.fwip.register_map.ydimension=self.ydim

        self.bckip= bckip
        self.bckip.register_map.x=self.BASE_ADDRESS_X
        self.bckip.register_map.w=self.BASE_ADDRESS_W
        self.bckip.register_map.b=self.BASE_ADDRESS_B
        self.bckip.register_map.xdimension=self.xdim
        self.bckip.register_map.ydimension=self.ydim
        self.bckip.register_map.dx=self.BASE_ADDRESS_DX
        self.bckip.register_map.dy=self.BASE_ADDRESS_DY
        self.bckip.register_map.lr=lr

        
    def set_weights(self,w,b):        
        for i in range(self.xdim*self.ydim):
            self.mmio_w.write(i*4,float_to_int(w[i]))
        for i in range(self.ydim):
            self.mmio_b.write(i*4,float_to_int(b[i]))

    def reset_weights(self):
        for i in range(self.xdim*self.ydim):
            self.mmio_w.write(i*4,float_to_int(0.012))
        
        for i in range(self.ydim):
            self.mmio_b.write(i*4,float_to_int(0.01))
            
    def get_weights(self):
        
        w=[]
        b=[]        
        for i in range(self.xdim*self.ydim):
            w.append(int_to_float(self.mmio_w.read(i*4)))
        
        for i in range(self.ydim):
            b.append(int_to_float(self.mmio_b.read(i*4)))
        
        return w,b

    def set_input(self,x):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,float_to_int(x[i]))
    def get_input(self):
        x=[]     
        for i in range(self.xdim):
            x.append(int_to_float(self.mmio_x.read(i*4)))     
        return x
    def reset_input(self):
        for i in range(self.xdim):
            self.mmio_x.write(i*4,0)
            self.mmio_dx.write(i*4,0)

            
    def reset_output(self):
        for i in range(self.ydim):
            self.mmio_y.write(i*4,0)
            self.mmio_dy.write(i*4,0)    
    def get_output(self):
        y=[]
        for i in range(self.ydim):
            y.append(int_to_float(self.mmio_y.read(i*4)))
        return y

    
    
    def set_dy(self,dy):        
        for i in range(self.ydim):
            self.mmio_dy.write(i*4,float_to_int(dy[i]))
    def get_dy(self):
        dy=[]
        for i in range(self.ydim):
            dy.append(int_to_float(self.mmio_dy.read(i*4)))
        return dy

    

    def fwprop(self):

        self.fwip.write(0x00, 1)
        fpga_state = self.fwip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fwip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fwip.write(0x00, 4)
                break

        self.fwip.write(0x00, 4)

    def bckprop(self):
        self.bckip.write(0x00, 1)
        fpga_state = self.bckip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.bckip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.bckip.write(0x00, 4)
                break

        self.bckip.write(0x00, 4)
        


In [5]:
class ActivationLayer():
    def __init__(self,dim,base_addr,type):

        self.type=type
        self.dim=dim

        self.base_addr=base_addr

        self.BASE_ADDRESS_X=base_addr
        self.BASE_ADDRESS_DX=self.BASE_ADDRESS_X+dim*4+4


        self.BASE_ADDRESS_Y=self.BASE_ADDRESS_DX+dim*4+4
        self.BASE_ADDRESS_DY=self.BASE_ADDRESS_Y+dim*4+4  

        self.mmio_x= MMIO(self.BASE_ADDRESS_X,self.dim*4)
        self.mmio_dx=MMIO(self.BASE_ADDRESS_DX,self.dim*4)

        self.mmio_y= MMIO(self.BASE_ADDRESS_Y,self.dim*4)
        self.mmio_dy= MMIO(self.BASE_ADDRESS_DY,self.dim*4)

        self.config_dic={'base':base_addr, 'x':self.BASE_ADDRESS_X,'dx':self.BASE_ADDRESS_DX,'y':self.BASE_ADDRESS_Y,'dy':self.BASE_ADDRESS_DY, 'dim':self.dim}


    def get_config_dic(self):
        
        return self.config_dic


    def initHardware(self,fwip,bckip):

        self.fwip=fwip
        self.fwip.register_map.y=self.BASE_ADDRESS_Y
        self.fwip.register_map.x=self.BASE_ADDRESS_X
        self.fwip.type=self.type
        self.fwip.register_map.dim=self.dim

        self.bckip=bckip
        self.bckip.register_map.x=self.BASE_ADDRESS_X
        self.bckip.register_map.dy=self.BASE_ADDRESS_DY
        self.bckip.register_map.dx=self.BASE_ADDRESS_DX
        self.bckip.type_r=self.type
        self.bckip.register_map.dim=self.dim

    def get_weights(self):
        
        w=[]
        b=[]               
        return w,b

    def set_weights(self,w,b):
        pass

    
    def set_input(self,x):
        for i in range(self.dim):
            self.mmio_x.write(i*4,float_to_int(x[i]))
    def get_input(self):
        x=[]     
        for i in range(self.dim):
            x.append(int_to_float(self.mmio_x.read(i*4)))     
        return x
    def reset_input(self):
        for i in range(self.dim):
            self.mmio_x.write(i*4,0)
            self.mmio_dx.write(i*4,0)

    def set_dy(self,dy):        
        for i in range(self.ydim):
            self.mmio_dy.write(i*4,float_to_int(dy[i]))
    def get_dy(self):
        dy=[]
        for i in range(self.ydim):
            dy.append(int_to_float(self.mmio_dy.read(i*4)))
        return dy
        
    def reset_output(self):
        for i in range(self.dim):
            self.mmio_y.write(i*4,0)
            self.mmio_dy.write(i*4,0)    
    def get_output(self):
        y=[]
        for i in range(self.dim):
            y.append(int_to_float(self.mmio_y.read(i*4)))
        return y

    def fwprop(self):

        self.fwip.write(0x00, 1)
        fpga_state = self.fwip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.fwip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.fwip.write(0x00, 4)
                break

        self.fwip.write(0x00, 4)

    def bckprop(self):
        self.bckip.write(0x00, 1)
        fpga_state = self.bckip.read(0x00)

        max_try = 1000000
        while fpga_state != 6 and fpga_state != 4:
            fpga_state = self.bckip.read(0x00)
            max_try = max_try -1
            if max_try == 0:
                print("ERROR: Can't go ahead")
                self.bckip.write(0x00, 4)
                break

        self.bckip.write(0x00, 4)
        


In [13]:
import copy
class Neural_Net():

    def __init__(self,mem_base_addr,fcc1,bck1,act_fwd, act_bck):

        self.mem_base_addr=mem_base_addr
        self.layers=[]
        self.layer_names=[]
        self.last_layer_out_address=mem_base_addr
        self.layer_base_addresses=[]
        self.layer_configs=[]
        self.nlayers=0
        self.fcc1=fcc1
        self.bck1=bck1
        self.act_fwd=act_fwd
        self.act_bck=act_bck

    def add(self,name,input_shape,output_shape):
        if name == "fcc":
            layer=FullyConnectedLayer(input_shape,output_shape,self.last_layer_out_address)
            self.layers.append(layer)
            cfg=layer.get_config_dic()
            self.last_layer_out_address=cfg['y']
            layer.reset_output()
            layer.reset_weights()
            self.layer_names.append("fcc")
        if name == "relu":
            layer=ActivationLayer(input_shape,self.last_layer_out_address,1)
            self.layers.append(layer)
            cfg=layer.get_config_dic()
            self.last_layer_out_address=cfg['y']
            layer.reset_output()
            self.layer_names.append("relu")
           

        self.layer_configs.append(cfg)
        self.nlayers+=1
       
    def get_input(self,i):
        return self.layers[i].get_input()
    def get_layer_output(self,i):
        return self.layers[i].get_output()
    

    def set_weights(self,weights):        
        for i in range(self.nlayers):
            self.layers[i].set_weights(weights[i])    
    def get_weights(self,i):      
        return (self.layers[i].get_weights())
    
            
    def set_dy(self,i,grad):
        self.layers[i].set_dy(grad)        
    def get_dy(self,i):
        return self.layers[i].get_dy()
    

    
    def calculate_mse_loss(self,x,y):
        if self.layer_names[-1] == "fcc":

            preds=np.zeros((len(y),self.layer_configs[-1]['ydim']))
        else:
            preds=np.zeros((len(y),self.layer_configs[-1]['dim']))
            
        for i in range(len(x)):
            preds[i]=self.predict(x[i],0.01)
            
        loss =0
        for i in range(len(y)):
            loss += np.sum(np.square(preds[i]-y[i]),axis=None)
            
        return loss/len(y)
    def calculate_gradient(self,y,ytrue):
        return 2*(y-ytrue)
    
    
    def predict(self,x,lr):
        self.runfwprop(x,lr)
        return self.get_layer_output(self.nlayers-1)
    
        
    def runfwprop(self,x,learning_rate):

        self.layers[0].set_input(x)
        for i in range(self.nlayers):
            if self.layer_names[i] == "fcc":
                
                self.layers[i].initHardware(self.fcc1,self.bck1,learning_rate)
                self.layers[i].fwprop()
            else:
                self.layers[i].initHardware(self.act_fwd,self.act_bck)
                self.layers[i].fwprop()
                

    def runbackprop(self,ylabel,learning_rate):

        pred=self.layers[self.nlayers-1].get_output()
        grad=self.calculate_gradient(pred,ylabel)
        self.layers[self.nlayers-1].set_dy(grad)
        
        for i in range(self.nlayers):
            if self.layer_names[i] == "fcc":
                self.layers[i].initHardware(self.fcc1,self.bck1,learning_rate)
                self.layers[i].bckprop()
            else:
                self.layers[self.nlayers-i-1].initHardware(self.act_fwd,self.act_bck)
                self.layers[self.nlayers-i-1].bckprop()    
    
 
    
    def train(self,x,y,epochs,learning_rate):
        x1=x.copy()
        y1=y.copy()
        learning_rate=float_to_int(learning_rate)
        for i in range(epochs):
            print(self.calculate_mse_loss(x[0:99],y[0:99]))
            for j in range(len(x)):

                self.runfwprop(x1[j],learning_rate)     
                self.runbackprop(y1[j],learning_rate)
                
            
#                 for k in range(self.nlayers):
#                     w,b=self.get_weights(k)
#                     y=self.get_layer_output(k)
#                     dy=self.get_dy(k)
#                     x=self.get_input(k)
#                     base_addr=self.layer_configs[k]['x']
#                     y_addr=self.layer_configs[k]['y']
#                     dx_addr=self.layer_configs[k]['dx']
#                     dy_addr=self.layer_configs[k]['dy']

#                     print("Layer"+str(k)+" inputs="+str(x))
#                     print("Layer"+str(k)+" weights="+str(w))
#                     print("Layer"+str(k)+" bias="+str(b))
#                     print("Layer"+str(k)+" outputs="+str(y))
#                     print("Layer"+str(k)+" ygrads="+str(dy))
                

                
                
                    
#                     print("Layer"+str(k)+" base address="+str(base_addr))
#                     print("Layer"+str(k)+" dx address="+str(dx_addr))
#                     print("Layer"+str(k)+" dy address="+str(dy_addr))

In [14]:
# import time

# w=np.array([0.57,0.99,0.23,0.11]).reshape(1,4)

# x_data=np.random.uniform(
#     low=0, high=5, size=100).astype(np.float32).reshape(100,1)
# y_data=np.dot(x_data,w)



# model=Neural_Net(0x4001_0000,fcc1,bck1)
# model.add(1,4)

# # print(model.calculate_mse_loss(x_data[0:99],y_data[0:99]))

# t1=time.time()
# model.train(x_data,y_data,10,0.03)
# t2=time.time()


In [15]:
import time


x_data=np.random.uniform(
    low=0, high=5, size=1000).astype(np.float32).reshape(1000,1)
y_data=np.sin(x_data).reshape((-1,1))



model=Neural_Net(0x4001_0000,fcc1,bck1,actv_fwd1,actv_bck1)
model.add("fcc",1,16)
model.add("relu",16,16)
model.add("fcc",16,16)
model.add("relu",16,16)
model.add("fcc",16,1)

# print(model.calculate_mse_loss(x_data[0:99],y_data[0:99]))

t1=time.time()
model.train(x_data,y_data,10,0.3)
t2=time.time()

0.5303323270992805
0.5150387778161507
0.5150387778161507


KeyboardInterrupt: 