We add a loss function.

# Adding  Backpropagation

In [0]:
import numpy as np 
from sklearn import metrics

import numpy as np 
from sklearn import metrics

def loss(y_true, y_predicted, loss_function='mse'):
    if loss_function == 'mse':       
        return metrics.mean_squared_error( y_true, y_predicted)
    else:
        raise Exception('Loss metric is not defined.')

def get_dz_from_loss(y, y_predicted, metric):
    if metric == 'mse':
        return y_predicted - y
    else:
        raise Exception('Loss metric is not defined.')

def sigma(z, act_func):
    global _activation
    if act_func == 'relu':
       return np.maximum(z, np.zeros(z.shape))
    
    elif act_func == 'sigmoid':
      return 1.0/(1.0 + np.exp( -z ))

    elif act_func == 'linear':
        return z
    else:
        raise Exception('Activation function is not defined.')

def sigma_prime(z, act_func):
    if act_func == 'relu':
        return np.maximum(np.sign(z), np.zeros(z.shape)) # 1 if backward input >0, 0 otherwise; then diaganolize

    elif act_func == 'sigmoid':
        h = sigma(z, 'sigmoid')
        return h *(1-h)

    elif act_func == 'linear':
        return np.ones(z.shape)

    else:
        raise Exception('Activation function is not defined.')

class Layer:
    def __init__(self,input_dim, output_dim, activation_function='linear'):    
        self.activation = activation_function
        self.input_dim = input_dim
        self.output_dim = output_dim 
        if input_dim > 0:
            #self.b = np.random.randn( output_dim, 1 )       
            #self.W = np.random.randn( output_dim, input_dim )
            #self.dW = np.random.randn( output_dim, input_dim )
            #self.db = np.random.randn( output_dim, 1 )
            self.b  = np.ones( (output_dim, 1) )       
            self.W  = np.ones( (output_dim, input_dim) )
            self.dW = np.ones( (output_dim, input_dim) )
            self.db = np.ones( (output_dim, 1) )
        self.a = np.zeros( (output_dim,1) )

    
    def set_weight(self, W ):
        self.W = W
      
    def set_bias(self, b ):
        self.b = b
  
    def compute_activation(self, a ): 
        self.z =  np.add( np.dot(self.W, a), self.b)
        self.a =  sigma(self.z, self.activation)
    
    
    def print( self ):      
        print(f"\n====== Layer Info =======")
        print(f"a    = {self.a}")
        if self.input_dim > 0: 
          print(f"W   =  {self.W}")          
          print(f"b   =  {self.b}")  
    

class Model:
    def __init__(self, input_dim):  
        self.neural_net = []
        self.neural_net.append(Layer(0 , input_dim, 'irrelevant'))    


    def add_layer(self, nr_neurons, activation='relu'):    
        layer_index = len(self.neural_net)
        input_dim = self.neural_net[layer_index - 1].output_dim
        new_layer = Layer( input_dim, nr_neurons, activation)
        self.neural_net.append( new_layer )


    def forward_propagation(self, input_vec ):
        self.neural_net[0].a = input_vec
        for layer_index in range(1,len(self.neural_net)):    
            _A_Prev = self.neural_net[layer_index-1].a                       
            self.neural_net[layer_index].compute_activation( _A_Prev )
        return  self.neural_net[layer_index].a
  
    def backward_propagation(self, y, y_predicted, num_train_datum, metric='mse', verbose=0):   
            nr_layers = len(self.neural_net)
            for layer_index in range(nr_layers-1,0,-1):
                if layer_index+1 == nr_layers: # if output layer

                    dz = np.multiply(get_dz_from_loss(y, y_predicted, metric), 
                                    sigma_prime(
                                        self.neural_net[layer_index].a, 
                                        self.neural_net[layer_index].activation)
                    )        
                else: 
                    dz = np.multiply(
                          np.dot(
                              self.neural_net[layer_index+1].W.T, 
                              dz), 
                          sigma_prime(
                                self.neural_net[layer_index].a, 
                                self.neural_net[layer_index].activation)
                          )         
                dW = np.dot(dz, self.neural_net[layer_index-1].a.T) / num_train_datum
                db = np.sum(dz, axis=1, keepdims=True) / num_train_datum

                # Update gradients
                self.neural_net[layer_index].dW = dW 
                self.neural_net[layer_index].db = db 

                if (verbose > 0):
                  print(f"\n\n====== Backward Propagation Layer {layer_index} =======")
                  print(f"dZ      =  {dz}")          
                  print(f"dW      =  {dW}")
                  print(f"db      =  {db}")
                  print(f"A           = {self.neural_net[layer_index].a}") 
                  print(f"A prev lay  = {self.neural_net[layer_index-1].a}") 
                  

    def update( self, learning_rate ):
        nr_layers = len(self.neural_net)
        for layer_index in range(1,nr_layers):        
            self.neural_net[layer_index].set_weight( self.neural_net[layer_index].W - learning_rate * self.neural_net[layer_index].dW )
            self.neural_net[layer_index].set_bias(  self.neural_net[layer_index].b  - learning_rate * self.neural_net[layer_index].db  )
    

    def summary(self):
        print("MODEL SUMMARY")
        for layer_index in range(len(self.neural_net)):        
          self.neural_net[layer_index].print()
          
        print("FINISHED MODEL SUMMARY")
        

# Test

In [14]:
#Testing        
input_dim = 2
output_dim = 1
model = Model( input_dim )
model.add_layer( 2, 'relu' )
model.add_layer( output_dim, 'linear' )


X  = np.array( [[1,1], [2,2]] ) 
y  =np.array( [[2, 3]] )

y_predicted = model.forward_propagation( X.T )
print(f" Predicted value {y_predicted}")
print(f" Predicted value after one update (=learning) cycle {y_predicted}")
print(f" Starting cost  : { loss(y_predicted, y)}")

model.backward_propagation(y, y_predicted, 1, verbose=True)
model.update( 0.1 )
y_predicted = model.forward_propagation( X )
print(f" Predicted value {y_predicted}")
print(f" Predicted value after one update (=learning) cycle {y_predicted}")
print(f"Cost after first learning cycle : { loss(y_predicted, y)}")

model.backward_propagation(y_true, y_predicted, 1, verbose=True)
model.update( 0.1 )
y_predicted = model.forward_propagation( X )
print(f" Predicted value {y_predicted}")
print(f" Predicted value after one update (=learning) cycle {y_predicted}")
print(f"Cost after second learning cycle : { loss(y_predicted, y)}")



 Predicted value [[ 7. 11.]]
 Predicted value after one update (=learning) cycle [[ 7. 11.]]
 Starting cost  : 44.5


dZ      =  [[5. 8.]]
dW      =  [[55. 55.]]
db      =  [[13.]]
A           = [[ 7. 11.]]
A prev lay  = [[3. 5.]
 [3. 5.]]


dZ      =  [[5. 8.]
 [5. 8.]]
dW      =  [[21. 21.]
 [21. 21.]]
db      =  [[13.]
 [13.]]
A           = [[3. 5.]
 [3. 5.]]
A prev lay  = [[1 2]
 [1 2]]
 Predicted value [[-0.3 -0.3]]
 Predicted value after one update (=learning) cycle [[-0.3 -0.3]]
Cost after first learning cycle : 8.09


dZ      =  [[-2.3 -3.3]]
dW      =  [[0. 0.]]
db      =  [[-5.6]]
A           = [[-0.3 -0.3]]
A prev lay  = [[0. 0.]
 [0. 0.]]


dZ      =  [[0. 0.]
 [0. 0.]]
dW      =  [[0. 0.]
 [0. 0.]]
db      =  [[0.]
 [0.]]
A           = [[0. 0.]
 [0. 0.]]
A prev lay  = [[1 1]
 [2 2]]
 Predicted value [[0.26 0.26]]
 Predicted value after one update (=learning) cycle [[0.26 0.26]]
Cost after second learning cycle : 5.267600000000001
